In [1]:
# Importing libraries
import pandas as pd
import numpy as np
import math
import scipy.stats as stats
from sklearn.cluster import KMeans
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Building functions

def get_dataframe_name_as_string(dataframe):
    for name in globals():
        if id(globals()[name]) == id(dataframe):
            return name
    for name in locals():
        if id(locals()[name]) == id(dataframe):
            return name
    return None
    
def normalize_column_values(dataframe, column_name):
    dataframe[column_name]  = (dataframe[column_name] - dataframe[column_name].min()) / (dataframe[column_name].max() - dataframe[column_name].min())

def save_dataframe_to_csv(dataframe):
    directory = r"C:\Users\joaoa\Desktop\Ironhack\Labs\Mid_Bootcamp_Project\{}.csv"
    dataframe.to_csv(directory.format(get_dataframe_name_as_string(dataframe)), index = False)       
    
def save_dataframes_into_excel_multiple_sheets(excel_name, *dataframes):
    directory = r"C:\Users\joaoa\Desktop\Ironhack\Labs\Mid_Bootcamp_Project\{}.xlsx"
    writer = pd.ExcelWriter(directory.format(excel_name), engine="xlsxwriter")
    for count in range(0,len(dataframes)):
        dataframes[count].to_excel(writer, sheet_name = "Sheet" + str(count + 1))       
    writer.close()

In [3]:
# This next piece of code extracts information from 1065 documents and summarize them in two csv files ('df_local' and 'df_stats')
# Those files are imported after this multiple line comment section (files available in https://drive.google.com/drive/folders/1O5QWDHgI5I_A-qVXerTOskWlQ5LCJrkS?usp=sharing)


r'''
# Creating new dataframes
df_local = pd.DataFrame(columns = ["point", "latitude", "longitude"])
df_stats = pd.DataFrame(columns = ["point", "period", "year", "month", "day", "mean", "min", "max"])

# Setting the number of first and last point
first_point = 0
last_point = 1064

# Creating list with the two periods
list_period = []
for count_year in range(1980,2022):
    for count_day in range(0,365):
        list_period.append(int((count_year-1980)/21)+1)

# Importing localization and temperatures of each point
file = r"C:\Users\joaoa\Desktop\Ironhack\Labs\Mid_Bootcamp_Project\ERA5 t2m (in K)\t2m\p{}.csv"

# Importing the information from all 1065 points (1065 csv documents) at once
for p in range(first_point, last_point + 1):
    if p == 0:
        df_info = pd.read_csv(file.format("00000"), nrows = 3, header = None)
        df_point_day = pd.read_csv(file.format("00000"), sep = ",", skiprows = 3)
    else:
        df_info = pd.read_csv(file.format("0"*(4-int(np.log10(p))) + str(p)), nrows = 3, header = None)
        df_point_day = pd.read_csv(file.format("0"*(4-int(np.log10(p))) + str(p)), sep = ",", skiprows = 3)
    
    # Discarding points with null values (localizated in the sea)
    if df_point_day["max"].isna().sum() == 0:
        
        # Dealing with columns
        df_point_day[["year","month","day"]] = df_point_day["Unnamed: 0"].str.split("-", expand = True)
        df_point_day = df_point_day.drop(["Unnamed: 0","sum"], axis = 1)

        # Converting temperatures from Kelvin to Celsius
        df_point_day["mean"] = df_point_day["mean"] - 273.15
        df_point_day["min"] = df_point_day["min"] - 273.15
        df_point_day["max"] = df_point_day["max"] - 273.15

        # Converting date numbers to integers
        df_point_day["year"] = df_point_day["year"].astype("int")
        df_point_day["month"] = df_point_day["month"].astype("int")
        df_point_day["day"] = df_point_day["day"].astype("int")

        # Dropping February 29th, and year 2022
        df_point_day = df_point_day.drop(df_point_day[(df_point_day["year"] == 2022)].index)
        df_point_day = df_point_day.drop(df_point_day[(df_point_day["month"] == 2) & (df_point_day["day"] == 29)].index)
        df_point_day = df_point_day.reset_index(drop = True)

        # Creating new columns
        df_point_day.insert(0, "point", p)
        df_point_day.insert(1, "period", "")
        df_point_day["period"] = list_period

        # Creating a dataframe with all points
        df_stats = pd.concat([df_stats, df_point_day], axis = 0)
        
        # Importing point information
        df_info = df_info.transpose()

        for i in range(0,3):
            df_info.iloc[0,i] = df_info.iloc[0,i].lower().replace(":", "")

        df_info.columns = df_info.iloc[0]
        df_info = df_info.drop([0], axis = 0).reset_index(drop = True)
        df_info.iloc[0,0] = int(df_info.iloc[0,0])

        df_local = pd.concat([df_local, df_info], axis = 0)

# Reseting index
df_local = df_local.reset_index(drop = True)
df_stats = df_stats.reset_index(drop = True)

# Computing the difference between max and min temperatures
df_stats["max-min"] = df_stats["max"] - df_stats["min"]

# Computing the derivate of temperatures
df_stats.insert(6, "diff", "")
df_stats["diff"] = df_stats.groupby(["point"])["mean"].diff().fillna(0)

# Saving the dataframes to csv
save_dataframe_to_csv(df_local)
save_dataframe_to_csv(df_stats)
'''

# Importing "local" data (with date and temperature information for each point)
df_local = pd.read_csv(r"C:\Users\joaoa\Desktop\Ironhack\Labs\Mid_Bootcamp_Project\df_local.csv")
df_local

Unnamed: 0,point,latitude,longitude
0,0,42.2,-8.2
1,1,42.1,-8.6
2,2,42.1,-8.5
3,3,42.1,-8.4
4,4,42.1,-8.3
...,...,...,...
1007,1058,37.1,-7.8
1008,1059,37.1,-7.7
1009,1062,37.0,-8.0
1010,1063,37.0,-7.9


In [4]:
# Importing "stats" data (with date and temperature information for each point)
df_stats = pd.read_csv(r"C:\Users\joaoa\Desktop\Ironhack\Labs\Mid_Bootcamp_Project\df_stats.csv")
df_stats

Unnamed: 0,point,period,year,month,day,mean,diff,min,max,max-min
0,0,1,1980,1,1,9.52050,0.00000,7.20010,11.22805,4.02795
1,0,1,1980,1,2,8.36395,-1.15655,6.74563,10.73098,3.98535
2,0,1,1980,1,3,6.79388,-1.57007,3.01278,12.19692,9.18414
3,0,1,1980,1,4,6.64060,-0.15328,2.32520,10.65957,8.33437
4,0,1,1980,1,5,5.38088,-1.25972,0.51370,8.66927,8.15557
...,...,...,...,...,...,...,...,...,...,...
15513955,1064,2,2021,12,27,17.29812,0.68682,16.89706,18.07168,1.17462
15513956,1064,2,2021,12,28,16.89282,-0.40530,15.51925,17.68917,2.16992
15513957,1064,2,2021,12,29,15.10040,-1.79242,13.57920,17.33120,3.75200
15513958,1064,2,2021,12,30,14.49874,-0.60166,12.30400,16.53320,4.22920


In [5]:
# Getting the statistics grouped by day
df_stats_by_day = df_stats.groupby(["year", "month", "day"], as_index=False)["mean","min","max","max-min"].agg(["mean"])
df_stats_by_day = df_stats_by_day.reset_index(level=["year", "month", "day"])
df_stats_by_day.columns = df_stats_by_day.columns.droplevel(1)
df_stats_by_day.insert(0, "period", "")
df_stats_by_day.loc[df_stats_by_day["year"] <= 2000, "period"] = 1
df_stats_by_day.loc[df_stats_by_day["year"] > 2000, "period"] = 2
df_stats_by_day

Unnamed: 0,period,year,month,day,mean,min,max,max-min
0,1,1980,1,1,11.512610,9.347775,13.900327,4.552552
1,1,1980,1,2,9.878900,6.612583,13.076219,6.463636
2,1,1980,1,3,7.604593,2.926605,13.338890,10.412285
3,1,1980,1,4,7.228851,2.590193,12.464369,9.874176
4,1,1980,1,5,8.438283,4.243818,12.308346,8.064527
...,...,...,...,...,...,...,...,...
15325,2,2021,12,27,13.954387,12.589720,15.954956,3.365236
15326,2,2021,12,28,13.624365,11.950860,15.807788,3.856929
15327,2,2021,12,29,13.213834,10.539834,16.001407,5.461573
15328,2,2021,12,30,11.229215,7.574822,16.757288,9.182466


In [6]:
# Starting a Cluster Analysis to group the months in 4 different classes

# Creating a new dataframe to save the information
df_clusters = pd.DataFrame(columns = ["month", "mean1_norm", "diff1_norm", "clusters1", "mean2_norm", "diff2_norm", "clusters2"])

# Spliting the entire period in two small periods
period_1 = df_stats.loc[df_stats["year"] <= 2000]
period_2 = df_stats.loc[df_stats["year"] > 2000]


for period in range(1,3):
    if period == 1:
        clusters = period_1
    else:
        clusters = period_2
        
    # Doing the clsuter analysis based on the variables "mean" and "diff"
    clusters = clusters.groupby(["month"], as_index=False)["mean","diff"].agg(["mean"])
    clusters.columns = ["mean", "diff"]
    clusters = clusters.reset_index(level=["month"])
    
    # Normalizing values
    normalize_column_values(clusters, "mean")
    normalize_column_values(clusters, "diff")
    
    df_clusters.iloc[:,3*period-2] = clusters["mean"]
    df_clusters.iloc[:,3*period-1] = clusters["diff"]

    # Clustering
    X = clusters.iloc[:,1:]
    kmeans = KMeans(n_clusters = 4, random_state = 42)
    kmeans.fit(X)
    kmeans.labels_ += 11
    
    df_clusters.iloc[:,3*period] = kmeans.labels_
    
    # Filling the the column "month" with values from 1 to 12, and the columns "clusters1" and "clusters2" with number from 1 to 4
    count = 1
    for month in range(0,12):
        df_clusters.iloc[month, 0] = month + 1
        
        if df_clusters.iloc[month,3*period] > 10:
            
            if period == 1:
                df_clusters.loc[df_clusters["clusters1"] == df_clusters.iloc[month,3*period], "clusters1"] = count
            else:
                df_clusters.loc[df_clusters["clusters2"] == df_clusters.iloc[month,3*period], "clusters2"] = count

            count += 1
        
df_clusters

Unnamed: 0,month,mean1_norm,diff1_norm,clusters1,mean2_norm,diff2_norm,clusters2
0,1,0.0,0.58874,1,0.0,0.405916,1
1,2,0.086698,0.633236,1,0.053915,0.480168,1
2,3,0.255325,0.768081,2,0.202123,0.699126,1
3,4,0.328873,0.844006,2,0.347942,0.49372,1
4,5,0.519387,0.792144,2,0.566793,1.0,2
5,6,0.798793,1.0,2,0.823471,0.570912,3
6,7,1.0,0.639886,3,0.97336,0.559243,3
7,8,0.991978,0.382476,3,1.0,0.41011,3
8,9,0.835325,0.138935,3,0.83287,0.149963,3
9,10,0.531719,0.063018,4,0.561272,0.036233,4


In [7]:
# Updating the dataframe "df_stats_by_day" with the clusters information
for month_count in range(1,13):
    df_stats_by_day.loc[(df_stats_by_day["month"] == month_count) & (df_stats_by_day["period"] == 1), "cluster"] = df_clusters.loc[month_count-1,"clusters1"]
    df_stats_by_day.loc[(df_stats_by_day["month"] == month_count) & (df_stats_by_day["period"] == 2), "cluster"] = df_clusters.loc[month_count-1,"clusters2"]
    
    df_stats_by_day.loc[(df_stats_by_day["month"] == month_count) & (df_stats_by_day["period"] == 1), "mean_norm_cluster"] = df_clusters.loc[month_count-1,"mean1_norm"]
    df_stats_by_day.loc[(df_stats_by_day["month"] == month_count) & (df_stats_by_day["period"] == 2), "mean_norm_cluster"] = df_clusters.loc[month_count-1,"mean2_norm"]
    
    df_stats_by_day.loc[(df_stats_by_day["month"] == month_count) & (df_stats_by_day["period"] == 1), "diff_norm_cluster"] = df_clusters.loc[month_count-1,"diff1_norm"]
    df_stats_by_day.loc[(df_stats_by_day["month"] == month_count) & (df_stats_by_day["period"] == 2), "diff_norm_cluster"] = df_clusters.loc[month_count-1,"diff2_norm"]
    
df_stats_by_day

Unnamed: 0,period,year,month,day,mean,min,max,max-min,cluster,mean_norm_cluster,diff_norm_cluster
0,1,1980,1,1,11.512610,9.347775,13.900327,4.552552,1.0,0.000000,0.588740
1,1,1980,1,2,9.878900,6.612583,13.076219,6.463636,1.0,0.000000,0.588740
2,1,1980,1,3,7.604593,2.926605,13.338890,10.412285,1.0,0.000000,0.588740
3,1,1980,1,4,7.228851,2.590193,12.464369,9.874176,1.0,0.000000,0.588740
4,1,1980,1,5,8.438283,4.243818,12.308346,8.064527,1.0,0.000000,0.588740
...,...,...,...,...,...,...,...,...,...,...,...
15325,2,2021,12,27,13.954387,12.589720,15.954956,3.365236,1.0,0.046178,0.442373
15326,2,2021,12,28,13.624365,11.950860,15.807788,3.856929,1.0,0.046178,0.442373
15327,2,2021,12,29,13.213834,10.539834,16.001407,5.461573,1.0,0.046178,0.442373
15328,2,2021,12,30,11.229215,7.574822,16.757288,9.182466,1.0,0.046178,0.442373


In [8]:
# Computing the quantile 0.90 of max temperatures
quantile_90 = df_stats["max"].quantile(0.90)
print("0.90 quantile of max temperatures =", round(quantile_90, 5))

0.90 quantile of max temperatures = 30.49246


In [9]:
# Getting the number of extreme max temperatures, mean, and variance for each point and period
df_over_90 = df_stats[df_stats["max"] > quantile_90].groupby(['point', 'period'], as_index=False)["max"].agg(["count", "mean", "var"])
df_over_90 = df_over_90.reset_index(level=['point', 'period'])

# Adapting columns' names
df_over_90.columns = ["point", "period", "count_over_90", "mean_over_90", "var_over_90"]

# Adding new columns relative to location
df_over_90.insert(2, "latitude", "")
df_over_90.insert(3, "longitude", "")

for point in range(0,len(df_local["point"])):
    df_over_90.loc[df_over_90["point"] == df_local.loc[point,"point"], "latitude"] = df_local.loc[point,"latitude"]
    df_over_90.loc[df_over_90["point"] == df_local.loc[point,"point"], "longitude"] = df_local.loc[point,"longitude"]

df_over_90

Unnamed: 0,point,period,latitude,longitude,count_over_90,mean_over_90,var_over_90
0,0,1,42.2,-8.2,154,32.047881,1.514551
1,0,2,42.2,-8.2,186,32.079369,1.678153
2,1,1,42.1,-8.6,88,31.771785,1.196684
3,1,2,42.1,-8.6,127,32.103152,1.543832
4,2,1,42.1,-8.5,165,31.958116,1.473559
...,...,...,...,...,...,...,...
2015,1062,2,37.0,-8.0,40,31.640407,1.051085
2016,1063,1,37.0,-7.9,24,31.405943,0.887930
2017,1063,2,37.0,-7.9,51,31.652694,1.237462
2018,1064,1,37.0,-7.8,32,31.435810,0.890437


In [10]:
# Getting the statistics grouped by point and period
df_stats_by_point = df_stats.groupby(["point", "period"], as_index=False)["mean"].agg(["mean"])
df_stats_by_point = df_stats_by_point.reset_index(level=['point', 'period'])

# Adding new columns relative to location
df_stats_by_point.insert(2, "latitude", "")
df_stats_by_point.insert(3, "longitude", "")

for point in range(0,len(df_local["point"])):
    df_stats_by_point.loc[2*point,"latitude"] = df_local.loc[point,"latitude"]
    df_stats_by_point.loc[2*point+1,"latitude"] = df_local.loc[point,"latitude"]
    df_stats_by_point.loc[2*point,"longitude"] = df_local.loc[point,"longitude"]
    df_stats_by_point.loc[2*point+1,"longitude"] = df_local.loc[point,"longitude"]

# Adding new columns relative to extreme temperatures
df_stats_by_point.insert(5, "count_over_90", 0)
df_stats_by_point.insert(6, "mean_over_90", "")
df_stats_by_point.insert(7, "var_over_90", "")

for row in range(0,len(df_over_90["point"])):
    df_stats_by_point.loc[(df_stats_by_point["point"] == df_over_90.loc[row,"point"]) & (df_stats_by_point["period"] == df_over_90.loc[row,"period"]), "count_over_90"] = df_over_90.loc[row,"count_over_90"]
    df_stats_by_point.loc[(df_stats_by_point["point"] == df_over_90.loc[row,"point"]) & (df_stats_by_point["period"] == df_over_90.loc[row,"period"]), "mean_over_90"] = df_over_90.loc[row,"mean_over_90"]
    df_stats_by_point.loc[(df_stats_by_point["point"] == df_over_90.loc[row,"point"]) & (df_stats_by_point["period"] == df_over_90.loc[row,"period"]), "var_over_90"] = df_over_90.loc[row,"var_over_90"]

df_stats_by_point.loc[df_stats_by_point["mean_over_90"] == "", "mean_over_90"] = quantile_90

df_stats_by_point

Unnamed: 0,point,period,latitude,longitude,mean,count_over_90,mean_over_90,var_over_90
0,0,1,42.2,-8.2,12.062662,154,32.047881,1.514551
1,0,2,42.2,-8.2,12.367129,186,32.079369,1.678153
2,1,1,42.1,-8.6,13.637483,88,31.771785,1.196684
3,1,2,42.1,-8.6,13.886673,127,32.103152,1.543832
4,2,1,42.1,-8.5,13.563556,165,31.958116,1.473559
...,...,...,...,...,...,...,...,...
2019,1062,2,37.0,-8.0,17.960456,40,31.640407,1.051085
2020,1063,1,37.0,-7.9,17.774283,24,31.405943,0.88793
2021,1063,2,37.0,-7.9,18.122078,51,31.652694,1.237462
2022,1064,1,37.0,-7.8,17.905242,32,31.43581,0.890437


In [11]:
# Getting the statistics of extreme occurences
df_extreme_occurences = df_stats.loc[df_stats["max"] > quantile_90]
df_extreme_occurences = df_extreme_occurences.reset_index(drop = True)
df_extreme_occurences = df_extreme_occurences.drop(["mean", "diff", "min", "max-min"], axis=1)
df_extreme_occurences

Unnamed: 0,point,period,year,month,day,max
0,0,1,1980,8,20,32.70248
1,0,1,1980,8,21,33.96273
2,0,1,1980,8,22,33.26782
3,0,1,1981,6,13,32.74404
4,0,1,1981,6,14,32.98660
...,...,...,...,...,...,...
1551366,1064,2,2020,8,1,31.95193
1551367,1064,2,2020,8,23,30.80840
1551368,1064,2,2021,8,14,30.66976
1551369,1064,2,2021,8,15,32.55690


In [12]:
# Creating a summary dataframe with all information needed for hypothesis testing
df_hypothesis_test = pd.DataFrame(columns = ["point", "latitude", "longitude","count1","count2","mean1","mean2","var1","var2","t_value","p_value","number_of_occurences","intensity_of_occurences","intensity_in_number"])
df_hypothesis_test[["point", "latitude", "longitude"]] = df_local[["point", "latitude", "longitude"]]
df_hypothesis_test.loc[:,"count1"] = 0
df_hypothesis_test.loc[:,"count2"] = 0

# Filling the fields "count", "mean", and "var"
for x in range(0,len(df_over_90)):
    point = df_over_90.loc[x,"point"]
    period = df_over_90.loc[x,"period"]
    
    if period == 1:
        df_hypothesis_test.loc[df_hypothesis_test["point"] == point, "count1"] = df_over_90.loc[x,"count_over_90"]
        df_hypothesis_test.loc[df_hypothesis_test["point"] == point, "mean1"] = df_over_90.loc[x,"mean_over_90"]
        df_hypothesis_test.loc[df_hypothesis_test["point"] == point, "var1"] = df_over_90.loc[x,"var_over_90"]       
    else:
        df_hypothesis_test.loc[df_hypothesis_test["point"] == point, "count2"] = df_over_90.loc[x,"count_over_90"]
        df_hypothesis_test.loc[df_hypothesis_test["point"] == point, "mean2"] = df_over_90.loc[x,"mean_over_90"]
        df_hypothesis_test.loc[df_hypothesis_test["point"] == point, "var2"] = df_over_90.loc[x,"var_over_90"]

# Filling the fields "t_value" and "p_value"
for count in range(0,len(df_hypothesis_test)):  
    if (df_hypothesis_test.loc[count,"count1"] > 1) and  (df_hypothesis_test.loc[count,"count2"] > 1):        
        df_hypothesis_test.loc[count,"t_value"] = (df_hypothesis_test.loc[count,"mean2"]-df_hypothesis_test.loc[count,"mean1"])*math.sqrt(df_hypothesis_test.loc[count,"count1"]*df_hypothesis_test.loc[count,"count2"]*(df_hypothesis_test.loc[count,"count1"]+df_hypothesis_test.loc[count,"count2"]-2)/(df_hypothesis_test.loc[count,"count1"]+df_hypothesis_test.loc[count,"count2"]))/math.sqrt((df_hypothesis_test.loc[count,"count1"]-1)*df_hypothesis_test.loc[count,"var1"]+(df_hypothesis_test.loc[count,"count2"]-1)*df_hypothesis_test.loc[count,"var2"])
        df_hypothesis_test.loc[count,"p_value"] = stats.t.cdf(df_hypothesis_test.loc[count,"t_value"], df_hypothesis_test.loc[count,"count1"]+df_hypothesis_test.loc[count,"count2"]-2)     
            
    # Filling the field "number_of_occurences"
    if df_hypothesis_test.loc[count,"count2"] > df_hypothesis_test.loc[count,"count1"]:
        df_hypothesis_test.loc[count,"number_of_occurences"] = "Increased"
    else:
        df_hypothesis_test.loc[count,"number_of_occurences"] = "Did not increased"
        
    # Filling the field "intensity_of_occurences"
    if df_hypothesis_test.loc[count,"p_value"] > 0.95:
        df_hypothesis_test.loc[count,"intensity_of_occurences"] = "Significantly increased"
    elif df_hypothesis_test.loc[count,"mean2"] > df_hypothesis_test.loc[count,"mean1"]:
        df_hypothesis_test.loc[count,"intensity_of_occurences"] = "Increased"
    elif (df_hypothesis_test.loc[count,"count1"] == 0) and (df_hypothesis_test.loc[count,"count2"] > 0):
        df_hypothesis_test.loc[count,"intensity_of_occurences"] = "Increased"
    else:
        df_hypothesis_test.loc[count,"intensity_of_occurences"] = "Did not increased"
        
# Filling the field "intensity_in_number"
df_hypothesis_test.loc[df_hypothesis_test["intensity_of_occurences"] == "Did not increased", "intensity_in_number"] = min(df_over_90["mean_over_90"]) + 0.25*(max(df_over_90["mean_over_90"])-min(df_over_90["mean_over_90"]))
df_hypothesis_test.loc[df_hypothesis_test["intensity_of_occurences"] == "Increased", "intensity_in_number"] = min(df_over_90["mean_over_90"]) + 0.5*(max(df_over_90["mean_over_90"])-min(df_over_90["mean_over_90"]))
df_hypothesis_test.loc[df_hypothesis_test["intensity_of_occurences"] == "Significantly increased", "intensity_in_number"] = min(df_over_90["mean_over_90"]) + 0.75*(max(df_over_90["mean_over_90"])-min(df_over_90["mean_over_90"]))

In [13]:
# Creating a chart with the Student's t-distribution
df_student = pd.DataFrame(columns = ["t_value", "p_value", "intensity_of_occurences"])

n = 10000
df = n - 1
student_list = stats.t.rvs(df, loc=0, scale=1, size=n, random_state = 42)

df_student["t_value"] = student_list
df_student["p_value"] = stats.t.cdf(df_student["t_value"], df, loc=0, scale=1)

# Assigning description to the values
df_student["intensity_of_occurences"] = "Did not increased"
df_student.loc[df_student["p_value"] > 0.5, "intensity_of_occurences"] = "Increased"
df_student.loc[df_student["p_value"] > 0.95, "intensity_of_occurences"] = "Significantly increased"

df_student

Unnamed: 0,t_value,p_value,intensity_of_occurences
0,0.497217,0.690476,Increased
1,-1.109414,0.133639,Did not increased
2,1.570733,0.941862,Increased
3,-0.583062,0.279933,Did not increased
4,0.245282,0.596878,Increased
...,...,...,...
9995,0.871799,0.808330,Increased
9996,0.520007,0.698465,Increased
9997,0.759468,0.776205,Increased
9998,0.121849,0.548489,Increased


In [14]:
# Saving all the dataframes

save_dataframes_into_excel_multiple_sheets("Temperature_change_(Excel)", df_stats_by_day, df_stats_by_point, df_hypothesis_test, df_student)
df_extreme_occurences.to_csv(r"C:\Users\joaoa\Desktop\Ironhack\Labs\Mid_Bootcamp_Project\Temperature_change_(Text).txt", sep='\t', index=False)