# Cloudy With A Chance of Burst!☁️💥
## Neural Network Classifier

In [6]:
from torch.utils.data import Dataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder


In [8]:
dataset = pd.read_csv('cloudpredictionsystemproject.csv')
dataset = dataset.drop(['Date', 'Evaporation', 'Sunshine', 'CloudBurstTomorrow'], axis=1)

# Filling continous column NAs with mean
numeric_columns = dataset.select_dtypes(include=np.number).columns
dataset[numeric_columns] = dataset[numeric_columns].fillna(round(dataset[numeric_columns].mean(),2))

# Filling categorical column NAs with forward fill
non_numeric_columns = dataset.select_dtypes(exclude=np.number).columns
dataset[non_numeric_columns] = dataset[non_numeric_columns].fillna(method='ffill')

dataset

Unnamed: 0,Location,MinimumTemperature,MaximumTemperature,Rainfall,WindGustDirection,WindGustSpeed,WindDirection9am,WindDirection3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temperature9am,Temperature3pm,CloudBurst Today
0,Albury,13.4,22.90,0.6,W,44.00,W,WNW,20.0,24.0,71.0,22.0,1007.7,1007.1,8.00,4.51,16.9,21.8,No
1,Albury,7.4,25.10,0.0,WNW,44.00,NNW,WSW,4.0,22.0,44.0,25.0,1010.6,1007.8,4.45,4.51,17.2,24.3,No
2,Albury,12.9,25.70,0.0,WSW,46.00,W,WSW,19.0,26.0,38.0,30.0,1007.6,1008.7,4.45,2.00,21.0,23.2,No
3,Albury,9.2,28.00,0.0,NE,24.00,SE,E,11.0,9.0,45.0,16.0,1017.6,1012.8,4.45,4.51,18.1,26.5,No
4,Albury,17.5,32.30,1.0,W,41.00,ENE,NW,7.0,20.0,82.0,33.0,1010.8,1006.0,7.00,8.00,17.8,29.7,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,Uluru,2.8,23.40,0.0,E,31.00,SE,ENE,13.0,11.0,51.0,24.0,1024.6,1020.3,4.45,4.51,10.1,22.4,No
145456,Uluru,3.6,25.30,0.0,NNW,22.00,SE,N,13.0,9.0,56.0,21.0,1023.5,1019.1,4.45,4.51,10.9,24.5,No
145457,Uluru,5.4,26.90,0.0,N,37.00,SE,WNW,9.0,9.0,53.0,24.0,1021.0,1016.8,4.45,4.51,12.5,26.1,No
145458,Uluru,7.8,27.00,0.0,SE,28.00,SSE,N,13.0,7.0,51.0,24.0,1019.4,1016.5,3.00,2.00,15.1,26.0,No


### Label Encoding

In [9]:
le = LabelEncoder()

for column in dataset.columns:
    if dataset[column].dtype == 'object':
        dataset[column] = le.fit_transform(dataset[column])
dataset

Unnamed: 0,Location,MinimumTemperature,MaximumTemperature,Rainfall,WindGustDirection,WindGustSpeed,WindDirection9am,WindDirection3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temperature9am,Temperature3pm,CloudBurst Today
0,2,13.4,22.90,0.6,13,44.00,13,14,20.0,24.0,71.0,22.0,1007.7,1007.1,8.00,4.51,16.9,21.8,0
1,2,7.4,25.10,0.0,14,44.00,6,15,4.0,22.0,44.0,25.0,1010.6,1007.8,4.45,4.51,17.2,24.3,0
2,2,12.9,25.70,0.0,15,46.00,13,15,19.0,26.0,38.0,30.0,1007.6,1008.7,4.45,2.00,21.0,23.2,0
3,2,9.2,28.00,0.0,4,24.00,9,0,11.0,9.0,45.0,16.0,1017.6,1012.8,4.45,4.51,18.1,26.5,0
4,2,17.5,32.30,1.0,13,41.00,1,7,7.0,20.0,82.0,33.0,1010.8,1006.0,7.00,8.00,17.8,29.7,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145455,41,2.8,23.40,0.0,0,31.00,9,1,13.0,11.0,51.0,24.0,1024.6,1020.3,4.45,4.51,10.1,22.4,0
145456,41,3.6,25.30,0.0,6,22.00,9,3,13.0,9.0,56.0,21.0,1023.5,1019.1,4.45,4.51,10.9,24.5,0
145457,41,5.4,26.90,0.0,3,37.00,9,14,9.0,9.0,53.0,24.0,1021.0,1016.8,4.45,4.51,12.5,26.1,0
145458,41,7.8,27.00,0.0,9,28.00,10,3,13.0,7.0,51.0,24.0,1019.4,1016.5,3.00,2.00,15.1,26.0,0


In [11]:
class CSV_Train(Dataset):
    def __init__ (self, df, train_frac):
        dataset = dataset.to_numpy()
        sc = StandardScaler()
        train_size = int(len(dataset) * train_frac)
        x_train = sc.fit_transform(dataset[:train_size, :-1])
        y_train = dataset[:train_size, -1]
        self.x_train = torch.tensor(x_train, dtype=torch.float32)
        self.y_train = torch.tensor(x_train)

    def __len__(self):
        return len(x_train)
    def __getitem__(self, idx):
        return x_train[idx], y_train[idx]

In [None]:
size = 64
train_dataloader =  DataLoader(, size)
test_dataset = DataLoader ()

In [None]:
class CSV_Dataset(Dataset):
  def __init__(self, file_name):
    csv = pd.read_csv(file_name)
    csv = csv.drop(['Date', 'Evaporation', 'Sunshine', 'CloudBurstTomorrow'], axis=1)
    catcol = csv.select_dtypes(include='str').columns
    for i in catcol:
        numeric_columns = csv.select_dtypes(include=np.number).columns
        csv[numeric_columns] = csv[numeric_columns].fillna(round(csv[numeric_columns].mean(),2))
        non_numeric_columns = csv.select_dtypes(exclude=np.number).columns
        csv[non_numeric_columns] = csv[non_numeric_columns].fillna(method='ffill')
        x = csv.iloc[:, :-1].values
        y = csv.iloc[:, -1:].values