In [None]:

import torch
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn import preprocessing, metrics
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim
import time
import seaborn as sns
import matplotlib.pyplot as plt


print(torch.__version__)


# Data Preprocessing

In [None]:
df_geometric = pd.read_csv('../../bricks_data/dataset_geometric.csv')
df_kitanidis = pd.read_csv('../../bricks_data/dataset_kitanidis.csv')
df_kitanidis.drop('pixel_id', axis=1, inplace=True)
df_geometric.drop('pixel_id', axis=1, inplace=True)

train_df_geo, test_df_geo = train_test_split(df_geometric, test_size=0.33, random_state=44, shuffle=True)
train_df_kit, test_df_kit = train_test_split(df_kitanidis, test_size=0.33, random_state=44, shuffle=True)

In [None]:
train_df_kit.head()

In [None]:
corrMatrix = df_kitanidis.corr()
sns.heatmap(corrMatrix, annot=False)
plt.show()


# Regression

In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import AdaBoostRegressor
galaxy_types = ['lrg','elg','qso']


### Geometrics

### Geometric - Linear Regression

In [None]:
for gal in galaxy_types:
    y_train = train_df_geo[gal]#.to_numpy(copy=True)
    X_train = train_df_geo.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_geo[gal]
    X_test = test_df_geo.drop(columns=['lrg','elg','qso'])
    reg = LinearRegression()
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"Linear Regression R^2 for {gal}, Geometric :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"Linear Regression MSE for {gal}, Geometric :  {metrics.mean_squared_error(y_gold, y_pred)}.")

### Geometric - Ridge Regression

In [None]:
for gal in galaxy_types:
    y_train = train_df_geo[gal]#.to_numpy(copy=True)
    X_train = train_df_geo.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_geo[gal]
    X_test = test_df_geo.drop(columns=['lrg','elg','qso'])
    reg = Ridge()
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"Ridge Regression R^2 for {gal}, Geometric :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"Ridge Regression MSE for {gal}, Geometric :  {metrics.mean_squared_error(y_gold, y_pred)}.")

### Geometric - Lasso Regression

In [None]:
for gal in galaxy_types:
    y_train = train_df_geo[gal]#.to_numpy(copy=True)
    X_train = train_df_geo.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_geo[gal]
    X_test = test_df_geo.drop(columns=['lrg','elg','qso'])
    reg = Lasso()
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"Lasso Regression R^2 for {gal}, Geometric :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"Lasso Regression MSE for {gal}, Geometric :  {metrics.mean_squared_error(y_gold, y_pred)}.")


### Geometric - Adaboost-Regressor

In [None]:
for gal in galaxy_types:
    y_train = train_df_geo[gal]#.to_numpy(copy=True)
    X_train = train_df_geo.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_geo[gal]
    X_test = test_df_geo.drop(columns=['lrg','elg','qso'])
    reg = AdaBoostRegressor(n_estimators=100)
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"AdaBoost Regression R^2 for {gal}, Geometric :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"AdaBoost Regression MSE for {gal}, Geometric :  {metrics.mean_squared_error(y_gold, y_pred)}.")


### Kitanidis


### Kitanidis - Linear Regression

In [None]:
for gal in galaxy_types:
    y_train = train_df_kit[gal]#.to_numpy(copy=True)
    X_train = train_df_kit.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_kit[gal]
    X_test = test_df_kit.drop(columns=['lrg','elg','qso'])
    reg = LinearRegression()
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"Linear Regression R^2 for {gal}, Kitanidis :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"Linear Regression MSE for {gal}, Kitanidis :  {metrics.mean_squared_error(y_gold, y_pred)}.")

### Kitanidis - Ridge Regression

In [None]:
for gal in galaxy_types:
    y_train = train_df_kit[gal]#.to_numpy(copy=True)
    X_train = train_df_kit.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_kit[gal]
    X_test = test_df_kit.drop(columns=['lrg','elg','qso'])
    reg = Ridge()
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"Ridge Regression R^2 for {gal}, Kitanidis :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"Ridge Regression MSE for {gal}, Kitanidis :  {metrics.mean_squared_error(y_gold, y_pred)}.")

### Kitanidis - Lasso Regression

In [None]:
for gal in galaxy_types:
    y_train = train_df_kit[gal]#.to_numpy(copy=True)
    X_train = train_df_kit.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_kit[gal]
    X_test = test_df_kit.drop(columns=['lrg','elg','qso'])
    reg = Lasso()
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"Lasso Regression R^2 for {gal}, Kitanidis :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"Lasso Regression MSE for {gal}, Kitanidis :  {metrics.mean_squared_error(y_gold, y_pred)}.")


### Kitanidis - AdaBoost-Regressor

In [None]:
for gal in galaxy_types:
    y_train = train_df_kit[gal]#.to_numpy(copy=True)
    X_train = train_df_kit.drop(columns=['lrg','elg','qso'])#.to_numpy(copy=True)
    y_gold = test_df_kit[gal]
    X_test = test_df_kit.drop(columns=['lrg','elg','qso'])
    reg = AdaBoostRegressor(n_estimators=100)
    reg.fit(X_train,y_train)
    y_pred = reg.predict(X_test)

    print(f"AdaBoost Regression R^2 for {gal}, Kitanidis :  {metrics.r2_score(y_gold, y_pred)}.")
    print(f"AdaBoost Regression MSE for {gal}, Kitanidis :  {metrics.mean_squared_error(y_gold, y_pred)}.")



