In [18]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from plotly import express as px

df = pd.read_csv("texasWeatherData.csv")
print(df.head())

     Date  meanTemp  Anomaly
0  192401      41.0     -4.4
1  192402      47.1     -2.3
2  192403      50.7     -5.6
3  192404      63.6     -1.2
4  192405      68.9     -3.6


In [19]:
df.describe()

Unnamed: 0,Date,meanTemp,Anomaly
count,1202.0,1202.0,1202.0
mean,197364.894343,64.996423,0.383444
std,2892.730222,13.479564,2.680966
min,192401.0,36.6,-8.8
25%,194901.25,52.325,-1.2
50%,197401.5,65.75,0.3
75%,199901.75,78.0,2.075
max,202402.0,88.2,12.2


In [20]:
# we're going to need to turn date from int into datetime
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1202 entries, 0 to 1201
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Date      1202 non-null   int64  
 1   meanTemp  1202 non-null   float64
 2   Anomaly   1202 non-null   float64
dtypes: float64(2), int64(1)
memory usage: 28.3 KB


In [21]:
df["Date"] = pd.to_datetime(df["Date"], format = '%Y%m')
df['year'] = df['Date'].dt.year
df["month"] = df["Date"].dt.month
print(df.head())
df.info()

        Date  meanTemp  Anomaly  year  month
0 1924-01-01      41.0     -4.4  1924      1
1 1924-02-01      47.1     -2.3  1924      2
2 1924-03-01      50.7     -5.6  1924      3
3 1924-04-01      63.6     -1.2  1924      4
4 1924-05-01      68.9     -3.6  1924      5
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1202 entries, 0 to 1201
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   Date      1202 non-null   datetime64[ns]
 1   meanTemp  1202 non-null   float64       
 2   Anomaly   1202 non-null   float64       
 3   year      1202 non-null   int32         
 4   month     1202 non-null   int32         
dtypes: datetime64[ns](1), float64(2), int32(2)
memory usage: 37.7 KB


In [22]:
forecast_data = df.rename(columns = {"Date": "ds", 
                                       "meanTemp": "y"})
print(forecast_data)

             ds     y  Anomaly  year  month
0    1924-01-01  41.0     -4.4  1924      1
1    1924-02-01  47.1     -2.3  1924      2
2    1924-03-01  50.7     -5.6  1924      3
3    1924-04-01  63.6     -1.2  1924      4
4    1924-05-01  68.9     -3.6  1924      5
...         ...   ...      ...   ...    ...
1197 2023-10-01  68.5      2.4  2023     10
1198 2023-11-01  56.5      1.9  2023     11
1199 2023-12-01  51.8      4.9  2023     12
1200 2024-01-01  44.3     -1.1  2024      1
1201 2024-02-01  56.3      6.9  2024      2

[1202 rows x 5 columns]


In [36]:
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
model = Prophet()
model.fit(forecast_data)
forecasts = model.make_future_dataframe(periods=2400)
predictions = model.predict(forecasts)
plot_plotly(model, predictions)

12:10:29 - cmdstanpy - INFO - Chain [1] start processing
12:10:29 - cmdstanpy - INFO - Chain [1] done processing


In [34]:
predictions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1322 entries, 0 to 1321
Data columns (total 16 columns):
 #   Column                      Non-Null Count  Dtype         
---  ------                      --------------  -----         
 0   ds                          1322 non-null   datetime64[ns]
 1   trend                       1322 non-null   float64       
 2   yhat_lower                  1322 non-null   float64       
 3   yhat_upper                  1322 non-null   float64       
 4   trend_lower                 1322 non-null   float64       
 5   trend_upper                 1322 non-null   float64       
 6   additive_terms              1322 non-null   float64       
 7   additive_terms_lower        1322 non-null   float64       
 8   additive_terms_upper        1322 non-null   float64       
 9   yearly                      1322 non-null   float64       
 10  yearly_lower                1322 non-null   float64       
 11  yearly_upper                1322 non-null   float64     

In [31]:
print(predictions.head(-5))

             ds      trend  yhat_lower  yhat_upper  trend_lower  trend_upper  \
0    1924-01-01  64.846669   41.903135   48.482400    64.846669    64.846669   
1    1924-02-01  64.847155   46.728968   53.284403    64.847155    64.847155   
2    1924-03-01  64.847610   53.749982   60.062804    64.847610    64.847610   
3    1924-04-01  64.848097   61.392464   67.770306    64.848097    64.848097   
4    1924-05-01  64.848568   69.177052   75.866323    64.848568    64.848568   
...         ...        ...         ...         ...          ...          ...   
1312 2024-05-22  66.971165   68.798457   75.603017    66.971165    66.971165   
1313 2024-05-23  66.971330   69.675080   76.386346    66.971330    66.971330   
1314 2024-05-24  66.971494   70.514396   77.136438    66.971494    66.971494   
1315 2024-05-25  66.971659   71.299527   77.647572    66.971659    66.971659   
1316 2024-05-26  66.971823   72.784993   79.064247    66.971823    66.971823   

      additive_terms  additive_terms_lo