<a href="https://colab.research.google.com/github/jiraiyam/Kaggle-projects-/blob/main/Tenperature_EDA_and_Foresting_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'anomliot:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F1436528%2F2379377%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240921%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240921T231330Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D164a3609c66fa1d87deaf3d6bcd4093f2e46ab7fbb17289d0561556244c61d47ace34a5f262d9a7a35cada98b7065464ff1f1d32bf9aa4c4461df1932cb9ca1cbcbad407c88bad657ce6bba684f8096334acd19e7aa4e1517b53f0ba39a1376c40f344b9fef68fa6d049f5ce59100a94e1097ccf1f387f91f3e0cce1639df92aeaaaf60647d7ff23f604dbedfe28199e1c456490264a0015d50293684b0b77d5ad37f14d33c21f1ae61f55ee6a21bb9f6e3f203ecd0d1e022b5258828c85d7abaf098f6e7deb6ada46bb99ee0b5acd5faeafe71c2a9b25550aef0d38d75f15afeb40ba207af023b44e2dc372d565bc848a78da4012b3e3ed060c38c69c824c8c'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from pylab import rcParams
import warnings
import seaborn as sns
rcParams["figure.figsize"]=(30,18)
plt.rcParams['figure.dpi'] = 300
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['font.size'] = 15
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
df=pd.read_csv("/kaggle/input/anomliot/dataset_final.csv")
df

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df['Time'] = pd.to_datetime(df['Time'], unit='s')

In [None]:
df.set_index('Time', inplace=True)


In [None]:
summary_stats = df.describe()
summary_stats

In [None]:
df

# EDA

In [None]:
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 10))
axes = axes.flatten()

sns.histplot(df['Temperature'], kde=True, ax=axes[0], color='skyblue')
axes[0].set_title('Temperature Distribution')

sns.histplot(df['Humidity'], kde=True, ax=axes[1], color='olive')
axes[1].set_title('Humidity Distribution')

sns.histplot(df['Air Quality'], kde=True, ax=axes[2], color='gold')
axes[2].set_title('Air Quality Distribution')

sns.histplot(df['Light'], kde=True, ax=axes[3], color='teal')
axes[3].set_title('Light Distribution')

sns.histplot(df['Loudness'], kde=True, ax=axes[4], color='purple')
axes[4].set_title('Loudness Distribution')

fig.delaxes(axes[5])

plt.tight_layout()
plt.show()

In [None]:
corr_matrix = df.corr()

plt.figure(figsize=(10, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix Heatmap')
plt.show()


In [None]:
fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(15, 10))
axes = axes.flatten()

df['Temperature'].plot(ax=axes[0], color='skyblue')
axes[0].set_title('Temperature over Time')

df['Humidity'].plot(ax=axes[1], color='olive')
axes[1].set_title('Humidity over Time')

df['Air Quality'].plot(ax=axes[2], color='gold')
axes[2].set_title('Air Quality over Time')

df['Light'].plot(ax=axes[3], color='teal')
axes[3].set_title('Light over Time')

df['Loudness'].plot(ax=axes[4], color='purple')
axes[4].set_title('Loudness over Time')

fig.delaxes(axes[5])

plt.tight_layout()
plt.show()

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
import plotly.express as px
import plotly.graph_objects as go
import statsmodels.api as sm

In [None]:
decompose_temp = seasonal_decompose(df['Temperature'], model='additive', period=60)  # Adjust the period based on your data frequency
decompose_humid = seasonal_decompose(df['Humidity'], model='additive', period=60)

In [None]:
decompose_temp.plot()
plt.suptitle('\n Seasonal Decomposition of Temperature', fontsize=16)
plt.show()

In [None]:
decompose_humid.plot()
plt.suptitle('\n Seasonal Decomposition of Humidity', fontsize=16)
plt.show()


In [None]:
from scipy import stats
numerical_columns = ['Temperature', 'Humidity', 'Air Quality', 'Light', 'Loudness']

plt.figure(figsize=(15, 8))
for i, col in enumerate(numerical_columns, 1):
    plt.subplot(2, 3, i)
    sns.boxplot(data=df[col])
    plt.title(f'Boxplot of {col}')
plt.tight_layout()
plt.show()

In [None]:
outliers = {}
threshold = 3  # Z-score threshold (greater than 3 means potential outlier)

for col in numerical_columns:
    z_scores = np.abs(stats.zscore(df[col]))
    outliers[col] = np.where(z_scores > threshold)[0]

In [None]:
for col, outlier_indices in outliers.items():
    print(f'Number of outliers in {col}: {len(outlier_indices)}')

In [None]:
missing_values = df.isnull().sum()
print("\nMissing Values in Each Column:")
print(missing_values)

# 4. Check Data Types and Convert Time to Datetime if Necessary
print("\nData Types of Each Column:")
print(df.dtypes)

In [None]:
def cap_outliers(df, column, threshold=3):
    z_scores = stats.zscore(df[column])
    capped_df = df.copy()
    capped_df[column] = np.where(np.abs(z_scores) > threshold, np.sign(z_scores) * threshold, df[column])
    return capped_df

for col in numerical_columns:
    df = cap_outliers(df, col)


In [None]:
sns.pairplot(df)
plt.suptitle("Pairplot of Variables", y=1.02)
plt.show()

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

sns.kdeplot(df['Temperature'], ax=axes[0], color='blue', fill=True)
axes[0].set_title('Temperature KDE')

sns.kdeplot(df['Humidity'], ax=axes[1], color='green', fill=True)
axes[1].set_title('Humidity KDE')

sns.kdeplot(df['Light'], ax=axes[2], color='purple', fill=True)
axes[2].set_title('Light KDE')

sns.kdeplot(df['Loudness'], ax=axes[3], color='red', fill=True)
axes[3].set_title('Loudness KDE')

plt.tight_layout()
plt.show()

In [None]:
# Violin Plot for distribution of each variable
fig, axes = plt.subplots(3, 2, figsize=(15, 10))
axes = axes.flatten()

sns.violinplot(y='Temperature', data=df, ax=axes[0], color='skyblue')
axes[0].set_title('Temperature Violin Plot')

sns.violinplot(y='Humidity', data=df, ax=axes[1], color='green')
axes[1].set_title('Humidity Violin Plot')

sns.violinplot(y='Air Quality', data=df, ax=axes[2], color='gold')
axes[2].set_title('Air Quality Violin Plot')

sns.violinplot(y='Light', data=df, ax=axes[3], color='purple')
axes[3].set_title('Light Violin Plot')

sns.violinplot(y='Loudness', data=df, ax=axes[4], color='red')
axes[4].set_title('Loudness Violin Plot')

# Remove empty axis
fig.delaxes(axes[5])

plt.tight_layout()
plt.show()

In [None]:
df['Temperature_Mean_30D'] = df['Temperature'].rolling(window=30).mean()
df['Temperature_Std_30D'] = df['Temperature'].rolling(window=30).std()
plt.figure(figsize=(10, 6))
df['Temperature'].plot(label='Temperature', color='skyblue')
df['Temperature_Mean_30D'].plot(label='30-Day Rolling Mean', color='green')
df['Temperature_Std_30D'].plot(label='30-Day Rolling Std', color='red')
plt.title('Temperature with 30-Day Rolling Mean and Standard Deviation')
plt.legend( bbox_to_anchor=(1, 1), loc='upper left')
plt.show()

In [None]:
from mpl_toolkits.mplot3d import Axes3D


In [None]:
sns.pairplot(df, kind='reg', diag_kind='kde', corner=True)
plt.suptitle("Pairplot with Regression Lines", y=1.02)
plt.show()


In [None]:
g = sns.FacetGrid(df, col="Light", col_wrap=3)
g.map(sns.scatterplot, "Temperature", "Humidity")
g.add_legend()
plt.suptitle("FacetGrid of Temperature vs Humidity, Grouped by Light Levels", y=1.02)
plt.show()


In [None]:
fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(df['Temperature'], df['Humidity'], df['Loudness'], c='b', marker='o')
ax.set_xlabel('Temperature')
ax.set_ylabel('Humidity')
ax.set_zlabel('Loudness')
plt.title('3D Scatter Plot of Temperature, Humidity, and Loudness')
plt.show()

In [None]:
plt.figure(figsize=(12, 8))
for i, column in enumerate(['Temperature', 'Humidity', 'Light', 'Loudness']):
    sns.kdeplot(df[column], fill=True, label=column, alpha=0.7)
plt.legend(loc='upper right')
plt.title('Ridge Plot for Temperature, Humidity, Light, and Loudness', fontsize=16)
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
plt.hexbin(df['Temperature'], df['Humidity'], gridsize=30, cmap='Blues')
cb = plt.colorbar(label='Count in bin')
plt.title('Hexbin Plot of Temperature vs Humidity')
plt.xlabel('Temperature')
plt.ylabel('Humidity')
plt.show()

In [None]:
features = ['Temperature', 'Humidity', 'Air Quality', 'Light', 'Loudness']

n = len(features)
fig, axes = plt.subplots(n, n, figsize=(20, 20))

for i in range(n):
    for j in range(n):
        if i != j:
            sns.regplot(x=features[i], y=features[j], data=df, ax=axes[i, j], color='blue')
            axes[i, j].set_title(f'{features[i]} vs {features[j]}')
        else:
            sns.kdeplot(df[features[i]], ax=axes[i, j], fill=True)
            axes[i, j].set_title(f'{features[i]} Distribution')

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
plt.hexbin(df['Temperature'], df['Humidity'], gridsize=30, mincnt=5, cmap='Blues')
cb = plt.colorbar(label='Count in bin')
plt.title('Hexbin Plot of Temperature vs Humidity (Min Count = 5)', fontsize=14, weight='bold')
plt.xlabel('Temperature', fontsize=12, weight='bold')
plt.ylabel('Humidity', fontsize=12, weight='bold')
plt.show()



In [None]:
plt.figure(figsize=(8, 6))
plt.hexbin(df['Temperature'], df['Humidity'], gridsize=30, cmap='Blues', alpha=0.5)
plt.scatter(df['Temperature'], df['Humidity'], color='red', alpha=0.1, s=10)
cb = plt.colorbar(label='Count in bin')
plt.title('Hexbin Plot of Temperature vs Humidity with Overlay Scatter', fontsize=14, weight='bold')
plt.xlabel('Temperature', fontsize=12, weight='bold')
plt.ylabel('Humidity', fontsize=12, weight='bold')
plt.show()


In [None]:
g = sns.FacetGrid(df, col='Air Quality', col_wrap=3, height=4)
g.map(plt.hexbin, 'Temperature', 'Humidity', gridsize=30, cmap='Blues', mincnt=5)
g.add_legend(title='Air Quality')
plt.subplots_adjust(top=0.9)
plt.show()


In [None]:
sns.kdeplot(x='Temperature', y='Humidity', data=df, fill=True, cmap='Blues', thresh=0, levels=20)
plt.title('Contour Plot of Temperature vs Humidity', fontsize=14, weight='bold')
plt.xlabel('Temperature', fontsize=12, weight='bold')
plt.ylabel('Humidity', fontsize=12, weight='bold')
plt.show()

In [None]:
plt.figure(figsize=(8, 6))
sns.kdeplot(x='Temperature', y='Humidity', data=df, fill=True, cmap='Blues', thresh=0)
plt.title('2D Density Plot of Temperature vs Humidity', fontsize=14, weight='bold')
plt.xlabel('Temperature', fontsize=12, weight='bold')
plt.ylabel('Humidity', fontsize=12, weight='bold')
plt.show()

In [None]:
import plotly.express as px

In [None]:
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
x = df['Temperature']
y = df['Humidity']
z = df['Loudness']
hist, xedges, yedges = np.histogram2d(x, y, bins=30, density=True)
xpos, ypos = np.meshgrid(xedges[:-1], yedges[:-1], indexing="ij")
xpos = xpos.ravel()
ypos = ypos.ravel()
zpos = 0
dx = dy = 0.1 * np.ones_like(zpos)
dz = hist.ravel()
ax.bar3d(xpos, ypos, zpos, dx, dy, dz, zsort='average', cmap='Blues')
ax.set_xlabel('Temperature')
ax.set_ylabel('Humidity')
ax.set_zlabel('Density')
plt.title('3D KDE of Temperature and Humidity', fontsize=14)
plt.show()

In [None]:
df['Hour'] = df.index.hour
df['Day'] = df.index.day_name()


In [None]:
df

In [None]:
plt.figure(figsize=(14, 6))
plt.plot(df.index, df['Temperature'], label='Temperature', color='orange')
plt.plot(df.index, df['Humidity'], label='Humidity', color='blue')
plt.title('Time Series of Temperature and Humidity')
plt.xlabel('Date')
plt.ylabel('Values')
plt.legend()
plt.grid()
plt.show()


In [None]:
daily_avg = df.groupby(['Day', 'Hour']).mean()['Temperature'].unstack()
plt.figure(figsize=(12, 6))
sns.heatmap(daily_avg, cmap='coolwarm', annot=True)
plt.title('Average Temperature by Day and Hour')
plt.xlabel('Hour of Day')
plt.ylabel('Day of Week')
plt.show()


In [None]:
from statsmodels.graphics.tsaplots import plot_acf

plt.figure(figsize=(12, 6))
plot_acf(df['Temperature'], lags=30)
plt.title('Autocorrelation Function for Temperature')
plt.show()


In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

plt.figure(figsize=(12, 6))
plot_pacf(df['Temperature'], lags=30)
plt.title('Partial Autocorrelation Function for Temperature')
plt.show()


In [None]:
rolling_mean = df['Temperature'].rolling(window=30).mean()
rolling_std = df['Temperature'].rolling(window=30).std()

plt.figure(figsize=(14, 6))
plt.plot(df.index, df['Temperature'], label='Temperature', color='orange')
plt.plot(df.index, rolling_mean, label='Rolling Mean (30 days)', color='red')
plt.plot(df.index, rolling_std, label='Rolling Std (30 days)', color='blue')
plt.title('Temperature with Rolling Mean and Standard Deviation')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.grid()
plt.show()


In [None]:
plt.figure(figsize=(12, 6))
sns.histplot(df['Temperature'], kde=True, bins=30, color='orange')
plt.title('Distribution of Temperature with KDE')
plt.xlabel('Temperature')
plt.ylabel('Frequency')
plt.show()


In [None]:
from pandas.plotting import lag_plot

plt.figure(figsize=(12, 6))
lag_plot(df['Temperature'])
plt.title('Lag Plot for Temperature')
plt.xlabel('Temperature(t)')
plt.ylabel('Temperature(t-1)')
plt.grid()
plt.show()


# Forcasting

In [None]:
data = df[['Temperature', 'Humidity', 'Loudness']]

In [None]:
from statsmodels.tsa.stattools import adfuller

def adf_test(series):
    result = adfuller(series)
    print(f'ADF Statistic: {result[0]}')
    print(f'p-value: {result[1]}')

# Check each variable for stationarity
adf_test(data['Temperature'])
adf_test(data['Humidity'])
adf_test(data['Loudness'])


In [None]:
data_diff = data.diff().dropna()


In [None]:
from statsmodels.tsa.api import VAR

model = VAR(data_diff)
results = model.fit(maxlags=15, ic='aic')


In [None]:
print(results.summary())


In [None]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense


n_steps = 10
n_features = data.shape[1]

def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        X.append(data[i:(i + n_steps), :])
        y.append(data[i + n_steps, 0])
    return np.array(X), np.array(y)

X, y = create_sequences(data.values, n_steps)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

model.fit(X_train, y_train, epochs=200, verbose=0)

predictions = model.predict(X_test)



In [None]:
start_date = data.index[-len(y_test)]
time_index = pd.date_range(start=start_date, periods=len(y_test), freq='T')
comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': predictions.flatten()}, index=time_index)
plt.figure(figsize=(14, 6))
plt.plot(comparison_df.index, comparison_df['Actual'], label='Actual Temperature', color='orange', alpha=0.7)
plt.plot(comparison_df.index, comparison_df['Predicted'], label='Predicted Temperature', color='blue', alpha=0.7)
plt.title('LSTM Predictions vs Actual Values')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.grid()
plt.show()


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [None]:
mae = mean_absolute_error(y_test, predictions)
rmse = np.sqrt(mean_squared_error(y_test, predictions))
r2 = r2_score(y_test, predictions)

start_date = data.index[-len(y_test)]
time_index = pd.date_range(start=start_date, periods=len(y_test), freq='T')

comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': predictions.flatten()}, index=time_index)

plt.figure(figsize=(14, 6))
plt.plot(comparison_df.index, comparison_df['Actual'], label='Actual Temperature', color='orange', alpha=0.7)
plt.plot(comparison_df.index, comparison_df['Predicted'], label='Predicted Temperature', color='blue', alpha=0.7)

plt.title(f'LSTM Predictions vs Actual Values\nMAE: {mae:.2f}, RMSE: {rmse:.2f}, R²: {r2:.2f}')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.grid()

residuals = comparison_df['Actual'] - comparison_df['Predicted']
plt.figure(figsize=(14, 6))
plt.plot(comparison_df.index, residuals, color='red', label='Residuals', alpha=0.5)
plt.axhline(0, color='black', linestyle='--')
plt.title('Residuals of Predictions')
plt.xlabel('Date')
plt.ylabel('Residuals')
plt.legend()
plt.grid()

for i in range(1, len(residuals)):
    if abs(residuals[i]) > 5:
        plt.annotate('Significant Error',
                     xy=(comparison_df.index[i], residuals[i]),
                     xytext=(comparison_df.index[i], residuals[i] + 2),
                     arrowprops=dict(facecolor='black', arrowstyle='->'))

plt.show()

In [None]:
import plotly.graph_objs as go

# Create traces
actual_trace = go.Scatter(x=comparison_df.index, y=comparison_df['Actual'], mode='lines', name='Actual Temperature', line=dict(color='orange'))
predicted_trace = go.Scatter(x=comparison_df.index, y=comparison_df['Predicted'], mode='lines', name='Predicted Temperature', line=dict(color='blue'))

# Create layout
layout = go.Layout(title='LSTM Predictions vs Actual Values',
                   xaxis_title='Date',
                   yaxis_title='Temperature',
                   hovermode='closest')

# Create a figure
fig = go.Figure(data=[actual_trace, predicted_trace], layout=layout)

# Show the plot
fig.show()


In [None]:
# Calculate rolling mean and standard deviation
rolling_mean = comparison_df['Actual'].rolling(window=24).mean()
rolling_std = comparison_df['Actual'].rolling(window=24).std()

plt.figure(figsize=(14, 6))
plt.plot(comparison_df.index, comparison_df['Actual'], label='Actual Temperature', color='orange', alpha=0.5)
plt.plot(comparison_df.index, rolling_mean, label='Rolling Mean (24h)', color='blue')
plt.fill_between(comparison_df.index, rolling_mean - rolling_std, rolling_mean + rolling_std, color='blue', alpha=0.1, label='Rolling Std Dev')
plt.title('Actual Temperature with Rolling Mean and Std Dev')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.legend()
plt.grid()
plt.show()


In [None]:
comparison_df.to_csv("Finall.csv"  , index=True)

In [None]:
df.to_csv("Data.csv"  , index=True)

In [None]:
df