In [69]:
import json
import requests
import plotly.express as px
import pandas as pd
import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from sklearn.linear_model import LinearRegression

## Loading Data
Using opne-meteo for historical climate data
ERA5: Generated using Copernicus Climate Change Service information 2022.

In [70]:
url = 'https://archive-api.open-meteo.com/v1/archive?latitude=38.80&longitude=-77.05&start_date=1971-01-01&end_date=2023-03-05&daily=temperature_2m_max,temperature_2m_min,temperature_2m_mean,sunrise,sunset,shortwave_radiation_sum,precipitation_sum,snowfall_sum&timezone=America%2FNew_York&temperature_unit=fahrenheit&windspeed_unit=mph&precipitation_unit=inch'
response = requests.get(url)
response = json.loads(response.text)
print(response)

{'latitude': 38.800003, 'longitude': -77.0, 'generationtime_ms': 68.40503215789795, 'utc_offset_seconds': -14400, 'timezone': 'America/New_York', 'timezone_abbreviation': 'EDT', 'elevation': 12.0, 'daily_units': {'time': 'iso8601', 'temperature_2m_max': '°F', 'temperature_2m_min': '°F', 'temperature_2m_mean': '°F', 'sunrise': 'iso8601', 'sunset': 'iso8601', 'shortwave_radiation_sum': 'MJ/m²', 'precipitation_sum': 'inch', 'snowfall_sum': 'inch'}, 'daily': {'time': ['1971-01-01', '1971-01-02', '1971-01-03', '1971-01-04', '1971-01-05', '1971-01-06', '1971-01-07', '1971-01-08', '1971-01-09', '1971-01-10', '1971-01-11', '1971-01-12', '1971-01-13', '1971-01-14', '1971-01-15', '1971-01-16', '1971-01-17', '1971-01-18', '1971-01-19', '1971-01-20', '1971-01-21', '1971-01-22', '1971-01-23', '1971-01-24', '1971-01-25', '1971-01-26', '1971-01-27', '1971-01-28', '1971-01-29', '1971-01-30', '1971-01-31', '1971-02-01', '1971-02-02', '1971-02-03', '1971-02-04', '1971-02-05', '1971-02-06', '1971-02-07',

### Applying time transformations

In [71]:
df = pd.DataFrame.from_records(response['daily'])
df['time'] = df.apply(lambda x: pd.Timestamp(x['time']), axis = 1)
df['month'] = df.apply(lambda x: datetime.date.strftime(x['time'], '%m-%b'), axis = 1)
df['year'] = df.apply(lambda x: datetime.date.strftime(x['time'], '%Y'), axis = 1)
print(df.columns)

Index(['precipitation_sum', 'shortwave_radiation_sum', 'snowfall_sum',
       'sunrise', 'sunset', 'temperature_2m_max', 'temperature_2m_mean',
       'temperature_2m_min', 'time', 'month', 'year'],
      dtype='object')


## Plotting Univariates

In [36]:
# Temperature by month as a violin plot
fig = px.violin(df, x = 'month', y = 'temperature_2m_mean')
fig.update_layout(template="simple_white",  title="Monthly Temperature Distributions")
fig.add_hrect(y0=-10, y1=32, 
              annotation_text="Freezing Temperatures", annotation_position="top right",  
              annotation_font_size=11,
              annotation_font_color="White",
              fillcolor="blue", opacity=0.25, line_width=0)
fig.show()

In [45]:
# Precipitation by month as a violin plot
df['precipitation_chance'] = df.apply(lambda x: int(x['precipitation_sum'] > 0), axis = 1)
precipitation_df = df.groupby('month').apply(lambda x: x['precipitation_chance'].mean())
fig = px.line(precipitation_df)
fig.update_layout(template="simple_white", title="Monthly Precipitation Distributions")
fig.show()

In [5]:
# Snowfall by month as a violin plot
df['snowfall_chance'] = df.apply(lambda x: int(x['snowfall_sum'] > 0), axis = 1)
precipitation_df = df.groupby('month').apply(lambda x: x['snowfall_chance'].mean())
fig = px.line(precipitation_df)
fig.update_layout(template="simple_white", title="Monthly Precipitation Distributions")
fig.show()

In [40]:
# Sunlight radition by month as a violin plot
fig = px.violin(df, x = 'month', y = 'shortwave_radiation_sum')
fig.update_layout(template="simple_white", title="Monthly Radiation Distributions")
fig.show()

In [42]:
# Hours of sun by month as a line graph
df['sunrise'] = df.apply(lambda x: pd.Timestamp(x['sunrise']), axis = 1)
df['sunset'] = df.apply(lambda x: pd.Timestamp(x['sunset']), axis = 1)
df['sunlight_minutes'] = df.apply(lambda x: pd.Timedelta(x['sunset'] - x['sunrise']).seconds / 60.0, axis = 1)
fig = px.line(df, x = 'time', y = 'sunlight_minutes', )
fig.update_xaxes(type = 'date', range = [pd.Timestamp(year = 2022, month = 1, day = 1), pd.Timestamp(year = 2022, month = 12, day = 31)])
fig.update_layout(template="simple_white", title="Monthly Sunlight")
fig.show()

In [43]:
# Temperature over years with trend line
fig = px.scatter(df, x = 'time', y = 'temperature_2m_mean')
fig.update_layout(template="simple_white", title="Climate Change")
fig.show()

In [44]:
# Temperature over years analysis
temp_df = df.groupby('year').apply(lambda x: x['temperature_2m_mean'].mean())
temp_df.drop(temp_df.tail(1).index, inplace=True)
fig = px.scatter(temp_df, trendline="ols", trendline_color_override="red")
fig.update_layout(template="simple_white", title="Climate Change")
fig.show()

In [41]:
# Definition of last frost date: Light freeze: 29° to 32°F (-1.7° to 0°C)—tender plants are killed.
# From the Farmer's Almanac: Note that frost dates are only an estimate based on historical climate data and are not set in stone. 
# The probability of a frost occurring after the spring frost date or before the fall frost date is 30%, which means that there is still a chance of frost occurring before or after the given dates!
df['frost_day'] = df.apply(lambda x: int(x['temperature_2m_min'] <= 32), axis = 1)
days_with_frost = df.groupby(by = 'year').apply(lambda x: x['frost_day'].sum())
fig = px.scatter(days_with_frost)
lin_regr = LinearRegression()
lin_res = lin_regr.fit(np.array(days_with_frost.index).reshape(-1,1), np.array(days_with_frost.values))
lin_fit = lin_regr.predict(np.array(days_with_frost.index).reshape(-1,1))
fig.add_trace(
    go.Scatter(x = days_with_frost.index, y = lin_fit, name = 'Trendline', line = dict(color = 'Orange')),
    #secondary_y=True
)
fig.update_layout(template="simple_white", title="Climate Change")
fig.show()
print(lin_res.coef_)

[-0.15290703]


## Plotting Final Graphs
1. Violin plot of temperature with snowfall line by month, include freezing point horizontal rectangle
2. Scatter plot of temperatures by year with OLS trend-line for climate change
3. Violin plot of sun radiation with sunlight violin by month
4. Heat map for temperature by month and year
5. Precipitation and evapotranspiration by month 
6. Daily temperatures, min, mean, max by day (can filter by year), with a vertical line for last frost date

In [58]:
# Final graph 1
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Violin(x = df['month'], y = df['temperature_2m_mean'], name = 'Historical mean temperatures'),
    secondary_y=False
)
fig.add_trace(
    go.Line(x = precipitation_df.index, y = precipitation_df.values, name = 'Chance of snow fall'),
    secondary_y=True
)
fig.update_layout(template="simple_white",  title="Monthly Temperature Distributions")
fig.add_hrect(y0=-10, y1=32, 
              annotation_text="Freezing Temperatures", annotation_position="top right",  
              annotation_font_size=11,
              annotation_font_color="White",
              fillcolor="blue", opacity=0.25, line_width=0)
fig.show()


plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [68]:
# Final graph 2
#fig = make_subplots(specs=[[{"secondary_y": True}]])
fig = go.Figure()
df.dropna(inplace = True)
df = df.loc[df['year'] != '2023']
df['year'] = df.apply(lambda x: int(x['year']), axis = 1)
lin_regr = LinearRegression()
lin_res = lin_regr.fit(np.array(df['year']).reshape(-1,1), np.array(df['temperature_2m_mean']))
lin_fit = lin_regr.predict(np.array(df['year']).reshape(-1,1))
fig.add_trace(
    go.Scatter(x = df['year'], y = lin_fit, name = 'Trendline', line = dict(color = 'Orange')),
    #secondary_y=True
)
# Get averages for the daily temp by min, max, mean and per year
mean_temp_series = df.groupby('year').apply(lambda x: x['temperature_2m_mean'].mean())
fig.add_trace(
    go.Scatter(x = mean_temp_series.index, y = mean_temp_series.values, name = 'Average Temperatures', mode = "markers", marker = dict(size=8, color="Purple")),
    #secondary_y=True
)
max_temp_series = df.groupby('year').apply(lambda x: x['temperature_2m_max'].mean())
fig.add_trace(
    go.Scatter(x = max_temp_series.index, y = max_temp_series.values, name = 'Max Temperatures', mode = "markers", marker = dict(size=8, color="Red")),
    #secondary_y=True
)
min_temp_series = df.groupby('year').apply(lambda x: x['temperature_2m_min'].mean())
fig.add_trace(
    go.Scatter(x = min_temp_series.index, y = min_temp_series.values, name = 'Min Temperatures', mode = "markers", marker = dict(size=8, color="Blue")),
    #secondary_y=True
)
fig.update_layout(template="simple_white",  title="Yearly Temperature Distributions", yaxis_title="Mean Temperature in Fahrenheit", xaxis_title="Year", legend_title="Legend")
#print(mean_temp_series)
#print(max_temp_series)
#print(min_temp_series)
temp_range = [mean_temp_series.iloc[0]-10,mean_temp_series.iloc[0]+10]
print(temp_range)
fig.update_yaxes(range = temp_range)
fig.show()
print(lin_res.coef_)

[45.76986301369863, 65.76986301369863]


[0.03781888]


In [46]:
# Figure 3
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
    go.Violin(x = df['month'], y = df['shortwave_radiation_sum'], name = 'Historical radiation'),
    secondary_y=False
)
fig.add_trace(
    go.Violin(x = df['month'], y = df['sunlight_minutes'], name = 'Sunlight'),
    secondary_y=True
)
fig.update_layout(template="simple_white",  title="Monthly Radiation Distributions")
fig.show()

In [62]:
# Figure 4
fig = go.Figure()
heatmap = df.groupby(by = ['year','month']).apply(lambda x: x['temperature_2m_mean'].mean())
heatmap = heatmap.reset_index(name = 'values')
fig.add_trace(
    go.Heatmap(x = heatmap.year, y = heatmap.month, z = heatmap['values'], name = 'Historical radiation')
)
fig.show()