Skip to content
Binary file modified __pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file added images/res.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified q01_load_data/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q01_load_data/__pycache__/build.cpython-36.pyc
Binary file not shown.
9 changes: 7 additions & 2 deletions q01_load_data/build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
# %load q01_load_data/build.py
import pandas as pd

# Write your code below

def load_data(path):
df = pd.read_csv(filepath_or_buffer=path, delimiter=';')
return df



Binary file modified q01_load_data/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q01_load_data/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q02_data_split/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q02_data_split/__pycache__/build.cpython-36.pyc
Binary file not shown.
15 changes: 12 additions & 3 deletions q02_data_split/build.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# %load q02_data_split/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from sklearn.model_selection import train_test_split
import pandas as pd
df = load_data('data/student-mat.csv')

# Write your code below


def split_data(df1):
X = df1.drop(df1.columns[len(df1.columns)-1], axis=1)
y = df1.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.7, test_size=0.3)
return X_train, X_test, y_train, y_test





Binary file modified q02_data_split/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q02_data_split/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q03_data_encoding/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_data_encoding/__pycache__/build.cpython-36.pyc
Binary file not shown.
15 changes: 12 additions & 3 deletions q03_data_encoding/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q03_data_encoding/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from sklearn.preprocessing import LabelEncoder
Expand All @@ -7,8 +8,16 @@

x_train, x_test, y_train, y_test = split_dataset(df)

# Write your code below


def label_encode(X_train, X_test):
numeric_features = [a for a in range(len(df.dtypes)) if df.dtypes[a] in ['int64','float64']]
cat_features = df.columns.difference(df.columns[numeric_features])
label_encoder = LabelEncoder()
X_transform = X_train.copy()
X_test_transform = X_test.copy()
for feature in cat_features:
X_transform[feature] = label_encoder.fit_transform(X_train[feature])
X_test_transform[feature] = label_encoder.fit_transform(X_test[feature])
return X_transform, X_test_transform



Binary file modified q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_data_encoding/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_ohe_encoder/__pycache__/build.cpython-36.pyc
Binary file not shown.
12 changes: 8 additions & 4 deletions q03_ohe_encoder/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q03_ohe_encoder/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from sklearn.preprocessing import OneHotEncoder
Expand All @@ -11,9 +12,12 @@
category_index = [x for x in range(len(df.columns)) if df[df.columns[x]].dtype == 'object']


# Write your code below
def ohe_encode(X, X_test, category_index=category_index):
X_transform = pd.get_dummies(X.iloc[category_index], drop_first=True)
X_test_transform = pd.get_dummies(X_test.iloc[category_index], drop_first=True)
return X_transform, X_test_transform








Binary file modified q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q04_data_visualisation/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q04_data_visualisation/__pycache__/build.cpython-36.pyc
Binary file not shown.
16 changes: 10 additions & 6 deletions q04_data_visualisation/build.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
# -*- coding: utf-8 -*-
# %load q04_data_visualisation/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

from greyatomlib.multivariate_regression_project.q04_data_visualisation.build import visualise_data
import matplotlib.pyplot as plt
import seaborn as sns
from pandas.plotting import scatter_matrix
data = load_data('data/student-mat.csv')
x_train, x_test, y_train, y_test = split_dataset(data)
x_train,x_test = label_encode(x_train,x_test)
df_train = x_train.join(y_train)
def visualize_data(df_train, path):
plot = scatter_matrix(df_train)
plt.show();
return plot



# Write your code below




Binary file modified q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q05_linear_regression_model/__pycache__/build.cpython-36.pyc
Binary file not shown.
9 changes: 7 additions & 2 deletions q05_linear_regression_model/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q05_linear_regression_model/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

Expand All @@ -10,6 +11,10 @@

x_train, x_test = label_encode(x_train,x_test)

def linear_regression(X,y):
model = LinearRegression()
lm = model.fit(X,y)
return lm



# Write your code below

Binary file not shown.
Binary file modified q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q06_cross_validation/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q06_cross_validation/__pycache__/build.cpython-36.pyc
Binary file not shown.
13 changes: 10 additions & 3 deletions q06_cross_validation/build.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
# %load q06_cross_validation/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

from greyatomlib.multivariate_regression_project.q05_linear_regression_model.build import linear_regression

from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

from greyatomlib.multivariate_regression_project.q06_cross_validation.build import cross_validation_regressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
import numpy as np
Expand All @@ -17,5 +18,11 @@

model =linear_regression(x_train,y_train)

# Write your code below

def cross_validation(model, X, y):
model.fit(x_train, y_train)
scores = cross_val_score(model, X, y)
r2_score_mean = np.mean(scores)
return r2_score_mean



Binary file modified q06_cross_validation/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q06_cross_validation/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q07_regression_pred/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q07_regression_pred/__pycache__/build.cpython-36.pyc
Binary file not shown.
15 changes: 13 additions & 2 deletions q07_regression_pred/build.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# %load q07_regression_pred/build.py

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

from sklearn.linear_model import LinearRegression
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

Expand All @@ -19,4 +20,14 @@
val = cross_validation_regressor(model,x_train,y_train)


# Write your code below
def regression_predictor(model, X, y):
model.fit(x_train,y_train)
y_pred = model.predict(X)
mse = mean_squared_error(y, y_pred)
mae = mean_absolute_error(y, y_pred)
r2 = r2_score(y, y_pred)
return y_pred, mse, mae, r2




Binary file modified q07_regression_pred/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q07_regression_pred/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q08_linear_model/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q08_linear_model/__pycache__/build.cpython-36.pyc
Binary file not shown.
16 changes: 12 additions & 4 deletions q08_linear_model/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q08_linear_model/build.py
import pandas as pd
import numpy as np
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
Expand All @@ -11,10 +12,17 @@
df = load_data('data/student-mat.csv')
x_train, x_test, y_train, y_test = split_dataset(df)
x_train,x_test = label_encode(x_train,x_test)
model =linear_regression(x_train,y_train)
model = linear_regression(x_train,y_train)
val = cross_validation_regressor(model,x_train,y_train)
y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test)

# Write your code below


def linear_model(x_train, x_test, y_train, y_test):
G = model.fit(x_train, y_train)
y_pred = model.predict(x_test)
stats1 = pd.DataFrame([[val, mae, mse, r2]], columns=['cross_validation', 'mae', 'mse', 'r2'])
return G, y_pred, stats1





Binary file modified q08_linear_model/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q08_linear_model/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q01_lasso/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q01_lasso/__pycache__/build.cpython-36.pyc
Binary file not shown.
17 changes: 15 additions & 2 deletions q09_advanced_model_q01_lasso/build.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
# %load q09_advanced_model_q01_lasso/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data

from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor
from greyatomlib.multivariate_regression_project.q09_advanced_model_q01_lasso.build import lasso

from sklearn.linear_model import Lasso
from sklearn import linear_model
import numpy as np
import pandas as pd

Expand All @@ -18,6 +22,15 @@

x_train,x_test = label_encode(x_train,x_test)

# Write your solution here


def lasso_model(x_train, x_test, y_train, alpha=0.1):
model = Lasso(alpha)
G = model.fit(x_train, y_train)
y_pred = model.predict(x_test)
val = cross_validation_regressor(model,x_train,y_train)
y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test)
stats1 = pd.DataFrame([[val, mae, mse, r2]], columns=['cross_validation', 'mae', 'mse', 'r2'])
return G, y_pred, stats1



Binary file not shown.
Binary file modified q09_advanced_model_q01_lasso/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q02_ridge/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q09_advanced_model_q02_ridge/__pycache__/build.cpython-36.pyc
Binary file not shown.
14 changes: 11 additions & 3 deletions q09_advanced_model_q02_ridge/build.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# %load q09_advanced_model_q02_ridge/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data

from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

from greyatomlib.multivariate_regression_project.q07_regression_pred.build import regression_predictor
from greyatomlib.multivariate_regression_project.q09_advanced_model_q02_ridge.build import ridge
from sklearn.linear_model import Ridge
import numpy as np
import pandas as pd
Expand All @@ -18,8 +20,14 @@

x_train,x_test = label_encode(x_train,x_test)

# Write your code below

def ridge_model(x_train, x_test, y_train, alpha=0.1):
model = Ridge(alpha)
G = model.fit(x_train, y_train)
y_pred = model.predict(x_test)
val = cross_validation_regressor(model,x_train,y_train)
y_pred, mse, mae, r2 = regression_predictor(model, x_test, y_test)
stats1 = pd.DataFrame([[val, mae, mse, r2]], columns=['cross_validation', 'mae', 'mse', 'r2'])
return G, y_pred, stats1




Binary file not shown.
Binary file modified q09_advanced_model_q02_ridge/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q10_data_missing_values/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q10_data_missing_values/__pycache__/build.cpython-36.pyc
Binary file not shown.
10 changes: 6 additions & 4 deletions q10_data_missing_values/build.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
# %load q10_data_missing_values/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset
from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode
import numpy as np
np.random.seed(9)
import pandas as pd
df = load_data('data/student-mat.csv')

x_train, x_test, y_train, y_test = split_dataset(df)
x_train,x_test = label_encode(x_train,x_test)
x_train,x_test = label_encode(x_train,x_test)
def describe_df(df):
return df.describe(), x_train.apply(pd.value_counts)
describe_df(df)


# Write your code below

Binary file modified q10_data_missing_values/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q10_data_missing_values/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
22 changes: 13 additions & 9 deletions q11_feature_selection_q01_plot_corr/build.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
# %load q11_feature_selection_q01_plot_corr/build.py

import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.pyplot import yticks, xticks, subplots, set_cmap
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data


from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset



from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode

df = load_data('data/student-mat.csv')

x_train, x_test, y_train, y_test = split_dataset(df)
x_train,x_test = label_encode(x_train,x_test)

def plot_corr(df, size=11):
x_train, x_test, y_train, y_test = split_dataset(df)
df_train = pd.concat([x_train,y_train], axis=1)
corr = df_train.corr()
fig, ax = subplots(figsize=(size,size))
plt.set_cmap('YlOrRd')
ax.matshow(corr)
xticks(range(len(corr.columns)), corr.columns, rotation=90)
yticks(range(len(corr.columns)), corr.columns)
fig.savefig('./images/data_image.png')
return ax

# =============================================================================
# To visualise data, you need to pass training data only as the assumption holds that test set is unknown data and obviously,you cant not make decision based on unseen data :-p

#Remember to concatenate training features and labels if you want to check that scatterplots which I would prefer.You are free to explore labels to labels, features to features ,etc scatterplots as you want by passing arguments
#============================================================================
#visualise_data(pd.concat([x_train,y_train],axis=1),"../images/data_image.png")

# Write your solution here:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
10 changes: 8 additions & 2 deletions q11_feature_selection_q02_best_k_features/build.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# %load q11_feature_selection_q02_best_k_features/build.py
# Default imports
from sklearn.feature_selection import SelectPercentile
from sklearn.feature_selection import f_regression
Expand All @@ -19,9 +20,14 @@


np.random.seed(9)
# Write your code below


def percentile_k_features(features, labels, k=50):
selector = SelectPercentile(f_regression, percentile=k)
selector.fit(features, labels)
idx_selected = selector.get_support(indices=True)
idx_sorted = [idx_selected for _, idx_selected in sorted(zip(selector.scores_[idx_selected], idx_selected), reverse=True)]
top_k_predictors = x_train.iloc[:, idx_sorted]
return list(top_k_predictors.columns.values)



Binary file not shown.
Binary file not shown.
Binary file modified q12_feature_selection/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q12_feature_selection/__pycache__/build.cpython-36.pyc
Binary file not shown.
16 changes: 11 additions & 5 deletions q12_feature_selection/build.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# import matplotlib.pyplot as plt
# %load q12_feature_selection/build.py
from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data
from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset

Expand All @@ -7,12 +7,18 @@
from greyatomlib.multivariate_regression_project.q11_feature_selection_q02_best_k_features.build import percentile_k_features

from greyatomlib.multivariate_regression_project.q11_feature_selection_q01_plot_corr.build import plot_corr

from greyatomlib.multivariate_regression_project.q12_feature_selection.build import feature_selection

import pandas as pd
df = load_data('data/student-mat.csv')

X = df.drop(df.columns[len(df.columns)-1], axis=1)
y = df.iloc[:,-1]
x_train, x_test, y_train, y_test = split_dataset(df)
x_train,x_test = label_encode(x_train,x_test)
X,_ = label_encode(X,x_train)

def pick_features(X, y, k=50):
k_best_features = percentile_k_features(X, y, k)
return k_best_features



# Write your code below
Binary file modified q12_feature_selection/tests/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q12_feature_selection/tests/__pycache__/test.cpython-36.pyc
Binary file not shown.
Binary file modified q13_plot_residuals/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file modified q13_plot_residuals/__pycache__/build.cpython-36.pyc
Binary file not shown.
Loading