In [None]:
import geopandas  as gpd
import numpy      as np
import pandas     as pd
import shapely    as shape
import os

print("GeoPandas Version: ", gpd.__version__)
print("Numpy Version: ", np.__version__)
print("Pandas Version: ", pd.__version__)
print("Shapely Version: ", shape.__version__)

# CREATING DATAFRAMES

In [None]:
# Create a DataFrame with your data
cwd       = os.getcwd()
filepath  = os.path.join(cwd, 'geo_data.csv')

df        = pd.read_csv(filepath)
df        = pd.DataFrame(df)

# One Line:
# gdf       = gpd.GeoDataFrame(
#               df,
#               geometry=gpd.points_from_xy(df.Longitude, df.Latitude),
#               crs="EPSG: 4326"
#             )

# Convert DataFrame to GeoDataFrame
gdf = gpd.GeoDataFrame( 
                        df, 
                        geometry=gpd.points_from_xy(df.Longitude, df.Latitude)
                      )

# Define a coordinate reference system (CRS)
gdf.crs = "EPSG:4326"  # WGS 84

# Save the GeoDataFrame to a shapefile
gdf.to_file('geo_data.shp')

# print(gdf)
print("Shapefile created successfully.")

#### From a Shapefile

In [None]:
cwd       = os.getcwd()
filepath  = os.path.join(cwd, 'shapefiles', 'geo_data.shp')
gdf       = gpd.read_file(filepath)
df        = pd.DataFrame(gdf.drop(columns='geometry'))
print(df)

#### From a CSV File

In [None]:
cwd       = os.getcwd()
filepath  = os.path.join(cwd, 'geo_data.csv')
df        = pd.read_csv(filepath)
df        = pd.DataFrame(df)
print(df.head(n=10))

#### From JSON File

In [None]:
cwd       = os.getcwd()
filepath  = os.path.join(cwd, 'geo_data.json')
df        = pd.read_json(filepath)
df        = pd.DataFrame(df)
print(df.head(n=10))

#### From GeoJson File

In [None]:
# Using the geopandas module
cwd       = os.getcwd()
filepath  = os.path.join(cwd, 'geo_data.geojson')
gdf       = gpd.read_file(filepath)
gdf       = gpd.GeoDataFrame(gdf)
# print(gdf.head(n=10))
# Separate the 'geometry' column into individual 'Latitude' and 'Longitude' columns, drop 'geometry' column
gdf['Latitude']   = gdf.geometry.y
gdf['Longitude']  = gdf.geometry.x
df                = gdf.drop(columns='geometry')
# Reorder columns to move 'Latitude' and 'Longitude' columns between 'Name' and 'LandUse' columns
df = df[['ID', 'Name', 'Latitude', 'Longitude', 'LandUse', 'Area_sq_km', 'Population', 'Elevation_m']]

print(df)

#### From a Dictionary

In [None]:
df_1 = pd.DataFrame(
  { "a" : [4, 5, 6],
    "b" : [7, 8, 9],
    "c" : [10, 11, 12],
  },
    index = [1, 2, 3],
)

print(df_1)
df_1.plot.area()
df_1.plot.bar()
df_1.plot.barh()
df_1.plot.box()
df_1.plot.density()
df_1.plot.kde()
df_1.plot.line()
df_1.plot.pie(subplots=True)


In [None]:
df_4_data: dict = {
  "Name": [
    "Braund, Mr. Owen Harris",
    "Allen, Mr. William Henry",
    "Bonnell, Miss. Elizabeth"
  ],
  "Age": [22, None, 58],
  "Sex": ["male", "male", "female"],
}

df = pd.DataFrame(df_4_data)
print(df)

print("\n")

df_age = df["Age"]
print(df_age)

#### From a List of Lists

In [None]:
df_2 = pd.DataFrame(
  data=[
    [4, 7, 10],
    [5, 8, 11], 
    [6, 9, 12],
  ], 
  index=[1, 2, 3],
  columns=['a', 'b', 'c']
)

print(df_2)

#### Multi-Indexing

In [None]:
# MultiIndexing
df_3_data: dict = {
  "a" : [4, 5, 6],
  "b" : [7, 8, 9],
  "c" : [10, 11, 12]
}

df_3_index = pd.MultiIndex.from_tuples(
  [
    ("d", 1),("d", 2),
    ("e", 2)
  ], 
  names=["n", "v"]
)

table = pd.DataFrame(data=df_3_data, index=df_3_index)

print(table)

#### From dict of Series or dicts

In [None]:
d_4_data = {
    "one": pd.Series([1.0, 2.0, 3.0], index=["a", "b", "c"]),
    "two": pd.Series([1.0, 2.0, 3.0, 4.0], index=["a", "b", "c", "d"]),
}
df_a = pd.DataFrame(d_4_data)
print(df_a)

print("\n")

df_b = pd.DataFrame(d_4_data, index=["d", "b", "a"], columns=["two", "three"])
print(df_b)

# CREATE A SERIES FROM SCRATCH

In [None]:
ages = pd.Series([22, 35, 58], name="Age")
print(ages)

In [None]:
s1 = pd.Series(np.random.randn(5), index=["a", "b", "c", "d", "e"])
print(s1)

In [None]:
print(s1.index)

In [None]:
s2 = pd.Series(np.random.randn(5))
print(s2)

In [None]:
# Series instantiated from dicts:
d = {"b": 1, "a": 0, "c": 2}
s3 = pd.Series(d)
print(s3)

In [None]:
# If an index is passed, the values in data corresponding to the labels in the index will be pulled out
s4 = pd.Series(d, index=["b", "c", "d", "a"])
print(s4)

# DO SOMETHING WITH THE DATAFRAME

In [None]:
'''
SCENARIO: I want to know the max age of the passengers
'''
pass_max_age = df["Age"].max()
# for series => ages.max()
print(pass_max_age)

In [None]:
'''
SCENARIO: I am interestined in some basic statistics of the numerical data of my data table 
'''
overview_data = df.describe()
print(overview_data)

mean_age = df["Age"].mean()
print("Mean Age: ", mean_age)
# count = df.count()
count_ages = df["Age"].count()
# print("Count: ", count)
print("Ages Count: ", count_ages)