In [10]:
%matplotlib inline

seed_value = 100
import os
os.environ['PYTHONHASHSEED']=str(seed_value)
import random
random.seed(seed_value)

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
import seaborn as sns

from sklearn.linear_model import LinearRegression
from sklearn import metrics
import statsmodels.api as sm
from scipy import stats

from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

# Data uploading

In [18]:
AIRPORT_ICAO = "ESGG"
#AIRPORT_ICAO = "ESSA"

DATA_DIR = os.path.join("..", "..")
DATA_DIR = os.path.join(DATA_DIR, "Data")
DATA_DIR = os.path.join(DATA_DIR, "Weather")
DATA_DIR = os.path.join(DATA_DIR, AIRPORT_ICAO)

# AIF
filename = AIRPORT_ICAO + "_2019_01_06_mean_by_lat_lon.csv"
full_filename = os.path.join(DATA_DIR, filename)
weather_df1 = pd.read_csv(full_filename, sep=' ')

filename = AIRPORT_ICAO + "_2019_07_12_mean_by_lat_lon.csv"
full_filename = os.path.join(DATA_DIR, filename)
weather_df2 = pd.read_csv(full_filename, sep=' ')

filename = AIRPORT_ICAO + "_2020_01_06_mean_by_lat_lon.csv"
full_filename = os.path.join(DATA_DIR, filename)
weather_df3 = pd.read_csv(full_filename, sep=' ')

filename = AIRPORT_ICAO + "_2020_07_12_mean_by_lat_lon.csv"
full_filename = os.path.join(DATA_DIR, filename)
weather_df4 = pd.read_csv(full_filename, sep=' ')

weather_df = pd.concat([weather_df1, weather_df2, weather_df3, weather_df4], axis=0)
weather_df.reset_index(inplace = True, drop = True)

pd.set_option('display.max_columns', None)

features_df = weather_df.drop('month', axis=1, inplace=False)
features_df = features_df.drop('day', axis=1, inplace=False)
features_df = features_df.drop('hour', axis=1, inplace=False)
features_df = features_df.drop('u10', axis=1, inplace=False)
features_df = features_df.drop('v10', axis=1, inplace=False)
features_df = features_df.drop('u100', axis=1, inplace=False)
features_df = features_df.drop('v100', axis=1, inplace=False)
features_df = features_df.drop('cin', axis=1, inplace=False)

while features_df['cbh'].isnull().sum().sum()>0:
    features_df['cbh'] = features_df['cbh'].fillna(features_df['cbh'].rolling(window=2, min_periods=1).mean())

features = features_df.columns
print(features)

# Separating out the features
data = features_df.loc[:, features].values

# Normalizing the features
data_rescaled = MinMaxScaler().fit_transform(data)

#print(data_rescaled)
data_rescaled[:,0] = 1 - data_rescaled[:,0] # cbh

# Standardizing the features
data_rescaled = StandardScaler().fit_transform(data_rescaled)

features_rescaled = pd.DataFrame(data_rescaled)

Index(['cbh', 'cape', 'cp', 'csf', 'csfr', 'hcc', 'i10fg', 'kx', 'lsf',
       'lssfr', 'lcc', 'mcc', 'sf', 'tcc', 'tciw', 'tclw', 'tcrw', 'tcsw',
       'tcw', 'tp', 'wind10', 'wind100'],
      dtype='object')


In [19]:
corr = features_rescaled.corr()
corr.style.background_gradient(cmap='coolwarm').set_precision(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
0,1.0,0.04,0.17,0.05,0.04,0.03,0.23,0.26,0.08,0.08,0.56,0.3,0.09,0.3,0.2,0.37,0.13,0.15,0.09,0.23,0.24,0.23
1,0.04,1.0,0.34,-0.02,-0.02,0.03,-0.05,0.29,-0.04,-0.04,-0.07,0.01,-0.04,0.02,0.08,0.0,0.03,0.02,0.36,0.15,-0.08,-0.09
2,0.17,0.34,1.0,0.12,0.09,0.15,0.18,0.38,-0.0,0.0,0.15,0.29,0.02,0.21,0.32,0.28,0.34,0.24,0.37,0.68,0.15,0.13
3,0.05,-0.02,0.12,1.0,0.73,0.05,0.05,0.04,0.28,0.29,0.1,0.14,0.47,0.09,0.12,0.04,0.0,0.12,-0.05,0.15,0.05,0.05
4,0.04,-0.02,0.09,0.73,1.0,0.04,0.03,0.03,0.23,0.24,0.08,0.12,0.36,0.08,0.1,0.03,-0.0,0.09,-0.05,0.11,0.04,0.04
5,0.03,0.03,0.15,0.05,0.04,1.0,0.12,0.17,0.12,0.13,0.33,0.53,0.12,0.73,0.57,0.31,0.27,0.4,0.22,0.31,0.13,0.15
6,0.23,-0.05,0.18,0.05,0.03,0.12,1.0,0.06,0.12,0.11,0.2,0.22,0.12,0.16,0.22,0.26,0.19,0.22,-0.03,0.29,0.98,0.95
7,0.26,0.29,0.38,0.04,0.03,0.17,0.06,1.0,0.05,0.05,0.28,0.43,0.06,0.31,0.29,0.39,0.27,0.23,0.73,0.38,0.05,0.04
8,0.08,-0.04,-0.0,0.28,0.23,0.12,0.12,0.05,1.0,0.97,0.19,0.23,0.98,0.13,0.29,0.06,0.0,0.3,-0.06,0.25,0.12,0.12
9,0.08,-0.04,0.0,0.29,0.24,0.13,0.11,0.05,0.97,1.0,0.19,0.24,0.96,0.13,0.31,0.07,0.0,0.33,-0.06,0.26,0.12,0.12
