In [21]:
### importing required resources
import pandas as pd
import numpy as np
from sympy import S, symbols, printing
from matplotlib import pyplot as plt
import datetime
import seaborn as sns

In [22]:
### specifying file location
file = 'C:/Users/jaska/Desktop/temp_londes.xlsx'

In [23]:
### reading the file into dataframe
londestemp = pd.read_excel(file)

In [24]:
### getting info about dataframe size and type
londestemp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245005 entries, 0 to 245004
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   date    245005 non-null  datetime64[ns]
 1   Temp    207115 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 3.7 MB


In [7]:
### dropping all rows with missing values and rewriting the dataframe
londestemp.dropna(axis = 0, how = 'any', inplace=True) #axis=0 means rows are dropped, 1 means columns;
# how=any means rows where any column value is na is dropped, all would mean drop only rows where all columns are na;
# inplace=True means that the dataframe is rewritten
londestemp.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 207115 entries, 0 to 245004
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   date    207115 non-null  datetime64[ns]
 1   Temp    207115 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 4.7 MB


In [25]:
### matching solar radiation data (excluding soil temperature values - 2016 and onwards)
londesnew = londestemp.loc[londestemp['date'].dt.year < 2016]

In [26]:
londesnew.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 148832 entries, 0 to 148831
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   date    148832 non-null  datetime64[ns]
 1   Temp    125406 non-null  float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 3.4 MB


In [27]:
londesnew.head()

Unnamed: 0,date,Temp
0,2011-10-03 16:00:00,11.783
1,2011-10-03 16:15:00,11.565
2,2011-10-03 16:30:00,11.662
3,2011-10-03 16:45:00,11.467
4,2011-10-03 17:00:00,12.05


In [28]:
### finding daily averages

### setting date as index in the new data set
londesnew.set_index('date')

### resampling data so that we have daily averages, max and min of the temperatures
daily_mean = londesnew.resample('D', on='date').mean()
daily_max = londesnew.resample('D', on='date').max()
daily_min = londesnew.resample('D', on='date').min()

In [29]:
daily_min.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1551 entries, 2011-10-03 to 2015-12-31
Freq: D
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    1551 non-null   datetime64[ns]
 1   Temp    1312 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 36.4 KB


In [30]:
daily_mean.head()
daily_max.head()
daily_min.head()

Unnamed: 0_level_0,date,Temp
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-10-03,2011-10-03 16:00:00,9.534
2011-10-04,2011-10-04 00:00:00,6.687
2011-10-05,2011-10-05 00:00:00,5.668
2011-10-06,2011-10-06 00:00:00,6.712
2011-10-07,2011-10-07 00:00:00,7.795


In [31]:
### renaming
mean_r = daily_mean.rename(columns={'Temp' : 'mean'})

In [32]:
max_r = daily_max.rename(columns={'Temp' : 'max'})
min_r = daily_min.rename(columns={'Temp' : 'min'})

In [33]:
min_r.drop(['date'], inplace = True, axis = 1)

In [34]:
max_r.drop(['date'], inplace = True, axis = 1)

In [35]:
combined = pd.concat([mean_r, max_r, min_r], axis=1)

In [36]:
combined.head()

Unnamed: 0_level_0,mean,max,min
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-10-03,11.068875,12.195,9.534
2011-10-04,12.507812,19.389,6.687
2011-10-05,13.469906,21.963,5.668
2011-10-06,13.698354,21.413,6.712
2011-10-07,16.033823,24.339,7.795


In [37]:
combined.to_excel(r'C:/Users/jaska/Desktop/daily_temps2.xlsx')