In [9]:
import numpy as np
import pandas as pd
import seaborn as sns
import re
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import optuna
import xgboost as xgb

from sklearn.metrics import r2_score
from scipy.stats import spearmanr
from scipy import stats

In [10]:
df_data = pd.read_csv('SolarPrediction.csv')
df_data

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,1475229326,9/29/2016 12:00:00 AM,23:55:26,1.21,48,30.46,59,177.39,5.62,06:13:00,18:13:00
1,1475229023,9/29/2016 12:00:00 AM,23:50:23,1.21,48,30.46,58,176.78,3.37,06:13:00,18:13:00
2,1475228726,9/29/2016 12:00:00 AM,23:45:26,1.23,48,30.46,57,158.75,3.37,06:13:00,18:13:00
3,1475228421,9/29/2016 12:00:00 AM,23:40:21,1.21,48,30.46,60,137.71,3.37,06:13:00,18:13:00
4,1475228124,9/29/2016 12:00:00 AM,23:35:24,1.17,48,30.46,62,104.95,5.62,06:13:00,18:13:00
...,...,...,...,...,...,...,...,...,...,...,...
32681,1480587604,12/1/2016 12:00:00 AM,00:20:04,1.22,44,30.43,102,145.42,6.75,06:41:00,17:42:00
32682,1480587301,12/1/2016 12:00:00 AM,00:15:01,1.17,44,30.42,102,117.78,6.75,06:41:00,17:42:00
32683,1480587001,12/1/2016 12:00:00 AM,00:10:01,1.20,44,30.42,102,145.19,9.00,06:41:00,17:42:00
32684,1480586702,12/1/2016 12:00:00 AM,00:05:02,1.23,44,30.42,101,164.19,7.87,06:41:00,17:42:00


In [11]:
df_data['month'] = df_data['Data'].apply(lambda x : re.search(r'^\d+', x).group(0)).astype(int)
df_data['date'] = df_data['Data'].apply(lambda x : re.search(r'(?<=\/)\d+(?=\/)', x).group(0)).astype(int)
df_data['year'] = df_data['Data'].apply(lambda x : re.search(r'(?<=\/)\d+(?=\s)', x).group(0)).astype(int)
df_data['hour'] = df_data['Time'].apply(lambda x : re.search(r'^\d+', x).group(0)).astype(int)
df_data['minute'] = df_data['Time'].apply(lambda x : re.search(r'(?<=\:)\d+(?=\:)', x).group(0)).astype(int)
df_data['second'] = df_data['Time'].apply(lambda x : re.search(r'\d+$', x).group(0)).astype(int)
df_data['riseminuter'] = df_data['TimeSunRise'].apply(lambda x : re.search(r'(?<=\:)\d+(?=\:)', x).group(0)).astype(int)
df_data['setminute'] = df_data['TimeSunSet'].apply(lambda x : re.search(r'(?<=\:)\d+(?=\:)', x).group(0)).astype(int)
df_data.head()

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet,month,date,year,hour,minute,second,riseminuter,setminute
0,1475229326,9/29/2016 12:00:00 AM,23:55:26,1.21,48,30.46,59,177.39,5.62,06:13:00,18:13:00,9,29,2016,23,55,26,13,13
1,1475229023,9/29/2016 12:00:00 AM,23:50:23,1.21,48,30.46,58,176.78,3.37,06:13:00,18:13:00,9,29,2016,23,50,23,13,13
2,1475228726,9/29/2016 12:00:00 AM,23:45:26,1.23,48,30.46,57,158.75,3.37,06:13:00,18:13:00,9,29,2016,23,45,26,13,13
3,1475228421,9/29/2016 12:00:00 AM,23:40:21,1.21,48,30.46,60,137.71,3.37,06:13:00,18:13:00,9,29,2016,23,40,21,13,13
4,1475228124,9/29/2016 12:00:00 AM,23:35:24,1.17,48,30.46,62,104.95,5.62,06:13:00,18:13:00,9,29,2016,23,35,24,13,13


In [12]:
print(stats.spearmanr(df_data['Radiation'], df_data['UNIXTime']))

SpearmanrResult(correlation=-0.15358422210627404, pvalue=1.0997785760929166e-171)


In [13]:
print(stats.spearmanr(df_data['Radiation'], df_data['Temperature']))

SpearmanrResult(correlation=0.7177460645182121, pvalue=0.0)


In [14]:
print(stats.spearmanr(df_data['Radiation'], df_data['Pressure']))

SpearmanrResult(correlation=0.04557748677200884, pvalue=1.6648355187356994e-16)


In [15]:
print(stats.spearmanr(df_data['Radiation'], df_data['Humidity']))

SpearmanrResult(correlation=-0.11757072021546888, pvalue=6.094105571339439e-101)


In [16]:
print(stats.spearmanr(df_data['Radiation'], df_data['WindDirection(Degrees)']))

SpearmanrResult(correlation=-0.31484259721681823, pvalue=0.0)


In [17]:
print(stats.spearmanr(df_data['Radiation'], df_data['Speed']))

SpearmanrResult(correlation=-0.02106594474305596, pvalue=0.00013961688951190135)


In [18]:
print(stats.spearmanr(df_data['Radiation'], df_data['month']))

SpearmanrResult(correlation=-0.13929524340462407, pvalue=2.739722002566359e-141)


In [19]:
print(stats.spearmanr(df_data['Radiation'], df_data['hour']))

SpearmanrResult(correlation=0.042537829545345816, pvalue=1.4292084310713007e-14)


In [20]:
print(stats.spearmanr(df_data['Radiation'], df_data['riseminuter']))

SpearmanrResult(correlation=-0.15399759851612338, pvalue=1.305832398511935e-172)


In [21]:
print(stats.spearmanr(df_data['Radiation'], df_data['setminute']))

SpearmanrResult(correlation=-0.09095693452014035, pvalue=5.290384668828758e-61)


In [None]:
grid = sns.FacetGrid(data=df_data, col='Radiation', hue = 'hour')
grid.map(sns.scatterplot, 'Radiation', 'hour')