# Final Project

In [None]:
import pandas as pd
import seaborn as sns


## Temperature

In [None]:
temperatures = pd.read_csv("data/temperature.csv")
temperatures['Date'] = pd.to_datetime(temperatures['Date'].astype(str), format='%Y%m') #.dt.to_period('M')
temperatures['Year'] = temperatures['Date'].dt.year
display(temperatures)

In [None]:
avg_temp = temperatures.groupby(['Year']).agg({'Value':['mean']}).reset_index()
avg_temp.columns = ['Year', "AvgTemp"]
display(avg_temp)

In [None]:
temp_scatter = sns.scatterplot(data = temperatures, x="Anomaly", y="Value")
display(temp_scatter)

In [None]:
temp_timeseries = sns.lineplot(data=avg_temp, x='Year', y='AvgTemp')
display(temp_timeseries)

## Wildfires

In [None]:
wildfires = pd.read_csv("data/wildfire.csv")
wildfires['Date'] = pd.to_datetime(wildfires['Date'].astype(str), format='%Y%m')#.dt.to_period('M')
display(wildfires)

In [None]:
wildfires['Year'] = wildfires['Date'].dt.year
avg_fires = wildfires.groupby(['Year']).agg({'Acres Burned': ['sum']}).reset_index()
avg_fires.columns = ['Year', "Acres Burned"]
display(avg_fires)

In [None]:
fires_timeseries = sns.lineplot(data=avg_fires, x='Year', y='Acres Burned')
display(fires_timeseries)

In [None]:
wildfire_scatter = sns.scatterplot(data=wildfires, x='Number of Fires', y='Acres Burned')
display(wildfire_scatter)

In [None]:
wildfire_lineplot_nof = sns.lineplot(data=wildfires, x='Date', y='Number of Fires')
display(wildfire_lineplot_nof)

In [None]:
wildfire_lineplot_acres_burned = sns.lineplot(data=wildfires, x='Date', y='Acres Burned')
display(wildfire_lineplot_acres_burned)

## Precipitation

In [None]:
precipitation = pd.read_csv("data/precipitation.csv")
precipitation['Date'] = pd.to_datetime(precipitation['Date'].astype(str), format='%Y%m')#.dt.to_period('M')
precipitation['Year'] = precipitation['Date'].dt.year
display(precipitation)

In [None]:
avg_prec = precipitation.groupby(['Year']).agg({'Value':['mean']}).reset_index()
avg_prec.columns = ['Year', "AvgPrec"]
display(avg_prec)

In [None]:
prec_timeseries = sns.lineplot(data=avg_prec, x='Year', y='AvgPrec')
display(prec_timeseries)

## Hurricanes

In [None]:
hurricane = pd.read_csv("data/hurricane.csv")
hurricane['Date'] = hurricane['year'].astype(str) + "-" + hurricane['month'].astype(str).str.zfill(2) + "-" + hurricane['day'].astype(str).str.zfill(2)
hurricane['Date'] = pd.to_datetime(hurricane['Date'], format="%Y-%m-%d").dt.to_period('M')
display(hurricane)

In [None]:
hurricane_scatter_status = sns.scatterplot(data=hurricane, x='wind', y='pressure', hue='status')
display(hurricane_scatter_status)

In [None]:
hurricane_scatter_category = sns.scatterplot(data=hurricane, x='wind', y='pressure', hue='category')
display(hurricane_scatter_category) # This doesn't take into account the NA's

In [None]:
tornadoes = pd.read_csv("data/tornado.csv")
tornadoes['Date'] = pd.to_datetime(tornadoes['Date'].astype(str), format='%Y%m').dt.to_period('M')
display(tornadoes)

In [None]:
new_tornadoes = pd.read_csv("data/1950-2023_actual_tornadoes.csv")
new_tornadoes['Date'] = pd.to_datetime(new_tornadoes['date'], format="%Y-%m-%d").dt.to_period('M')
display(tornadoes)

In [None]:
earthquake = pd.read_csv("data/eqint_tsqp.csv")
earthquake_usa = earthquake[(earthquake.COUNTRY == "USA") & (earthquake.LONGITUDE < 0)]
display(new_tornadoes)

# Maps

In [None]:
import pandas as pd
import geopandas
import matplotlib.pyplot as plt
from geodatasets import get_path

## Earthquake

In [None]:
gdf = geopandas.GeoDataFrame(
    earthquake_usa, geometry=geopandas.points_from_xy(earthquake_usa.LONGITUDE, earthquake_usa.LATITUDE), crs="EPSG:4326"
)
print(gdf.head())

In [None]:
world = geopandas.read_file(get_path("naturalearth.land"))

# We restrict to South America.
ax = world.clip([-200, 10, -50, 72]).plot(color="white", edgecolor="black")

# We can now plot our ``GeoDataFrame``.
gdf.plot(ax=ax, color="red", markersize=1)

plt.show()

## Hurricane

In [None]:
hurricane = hurricane[hurricane['year'] > 2018]

hurricane_storms = hurricane[hurricane['status'] == 'hurricane']['name'].unique() 

# Filter the dataset to include only those storms
hurricane = hurricane[hurricane['name'].isin(hurricane_storms)] # take only names that turnned into a hurricane

from shapely.geometry import Point, LineString
# Step 2: Create a GeoDataFrame
# Create a geometry column with Point objects
hurricane['geometry'] = hurricane.apply(lambda row: Point(row['long'], row['lat']), axis=1)
gdf = geopandas.GeoDataFrame(hurricane, geometry='geometry', crs='EPSG:4326')

# Step 3: Group by hurricane name and create paths
# Create a new GeoDataFrame for paths
paths = []

for name, group in gdf.groupby('name'):
    # Ensure the group is sorted by time (year, month, day, hour)
    group = group.sort_values(by=['year', 'month', 'day', 'hour'])
    
    # Create a LineString for the hurricane path
    path_line = LineString(group.geometry.tolist())
    
    # Add the LineString as a new row in the paths list
    paths.append({'name': name, 'geometry': path_line})

# Convert the paths list into a GeoDataFrame
paths_gdf = geopandas.GeoDataFrame(paths, crs='EPSG:4326')

# Step 4: Plot the map
# Create a base map
world = geopandas.read_file(get_path("naturalearth.land"))
ax = world.clip([-110, 0, -10, 72]).plot(color='lightgrey', edgecolor='white', figsize=(12, 10))

# Plot each hurricane's path with a unique color
paths_gdf.plot(ax=ax, column='name', legend=True, cmap='tab10', linewidth=2)
gdf.plot(ax=ax, color='black', markersize=10, label="Hurricane Points")

plt.title("Hurricane Paths")
plt.legend()
plt.show()