diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index b0c3719..8c91383 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/__init__.cpython-36.pyc b/q01_load_data/__pycache__/__init__.cpython-36.pyc index 4596200..01ec408 100644 Binary files a/q01_load_data/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/__pycache__/build.cpython-36.pyc b/q01_load_data/__pycache__/build.cpython-36.pyc index 98e98a7..572be49 100644 Binary files a/q01_load_data/__pycache__/build.cpython-36.pyc and b/q01_load_data/__pycache__/build.cpython-36.pyc differ diff --git a/q01_load_data/build.py b/q01_load_data/build.py index 7cd3700..9eac22f 100644 --- a/q01_load_data/build.py +++ b/q01_load_data/build.py @@ -1,4 +1,10 @@ +# %load q01_load_data/build.py import pandas as pd # Write your code below - +path = 'data/student-mat.csv' +def load_data(path): + df = pd.read_table(path, sep=';') + return df +load_data(path) + diff --git a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc index d07fd2f..000afd6 100644 Binary files a/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc and b/q01_load_data/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_load_data/tests/__pycache__/test.cpython-36.pyc b/q01_load_data/tests/__pycache__/test.cpython-36.pyc index 9aa6996..4225548 100644 Binary files a/q01_load_data/tests/__pycache__/test.cpython-36.pyc and b/q01_load_data/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/__init__.cpython-36.pyc b/q02_data_split/__pycache__/__init__.cpython-36.pyc index 5d17273..2992e88 100644 Binary files a/q02_data_split/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/__pycache__/build.cpython-36.pyc b/q02_data_split/__pycache__/build.cpython-36.pyc index e6bd2eb..a531d4a 100644 Binary files a/q02_data_split/__pycache__/build.cpython-36.pyc and b/q02_data_split/__pycache__/build.cpython-36.pyc differ diff --git a/q02_data_split/build.py b/q02_data_split/build.py index c2e7147..c6d01f6 100644 --- a/q02_data_split/build.py +++ b/q02_data_split/build.py @@ -1,8 +1,14 @@ +# %load q02_data_split/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from sklearn.model_selection import train_test_split import pandas as pd df = load_data('data/student-mat.csv') # Write your code below - - +def split_dataset(df): + X = df.drop(['G3'], axis=1) + y = df['G3'] + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + return X_train, X_test, y_train, y_test + + diff --git a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc index e780e63..8033b60 100644 Binary files a/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc and b/q02_data_split/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_data_split/tests/__pycache__/test.cpython-36.pyc b/q02_data_split/tests/__pycache__/test.cpython-36.pyc index a1b3fc5..23eb256 100644 Binary files a/q02_data_split/tests/__pycache__/test.cpython-36.pyc and b/q02_data_split/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc index 884722b..3b00ade 100644 Binary files a/q03_data_encoding/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/__pycache__/build.cpython-36.pyc b/q03_data_encoding/__pycache__/build.cpython-36.pyc index 302366c..6e973be 100644 Binary files a/q03_data_encoding/__pycache__/build.cpython-36.pyc and b/q03_data_encoding/__pycache__/build.cpython-36.pyc differ diff --git a/q03_data_encoding/build.py b/q03_data_encoding/build.py index bb4c8ca..80645e5 100644 --- a/q03_data_encoding/build.py +++ b/q03_data_encoding/build.py @@ -1,3 +1,4 @@ +# %load q03_data_encoding/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from sklearn.preprocessing import LabelEncoder @@ -8,7 +9,11 @@ x_train, x_test, y_train, y_test = split_dataset(df) # Write your code below - - +def label_encode(x_train, x_test): + le = LabelEncoder() + X_transform = x_train.apply(le.fit_transform) + X_test_transform = x_test.apply(le.fit_transform) + return X_transform, X_test_transform + diff --git a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc index 7d18c18..091f342 100644 Binary files a/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc index 8ade2b7..40f7db5 100644 Binary files a/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc and b/q03_data_encoding/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc index e4ec35b..d32b5b3 100644 Binary files a/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc index 1433b7b..4d5ff70 100644 Binary files a/q03_ohe_encoder/__pycache__/build.cpython-36.pyc and b/q03_ohe_encoder/__pycache__/build.cpython-36.pyc differ diff --git a/q03_ohe_encoder/build.py b/q03_ohe_encoder/build.py index 36e4b90..e56fcab 100644 --- a/q03_ohe_encoder/build.py +++ b/q03_ohe_encoder/build.py @@ -1,19 +1,24 @@ +# %load q03_ohe_encoder/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset +from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode from sklearn.preprocessing import OneHotEncoder +from sklearn.preprocessing import LabelEncoder import pandas as pd -import numpy as np - -df = load_data('data/student-mat.csv') - -x_train, x_test, y_train, y_test = split_dataset(df) +path = 'data/student-mat.csv' +df = load_data(path) category_index = [x for x in range(len(df.columns)) if df[df.columns[x]].dtype == 'object'] +columns = [col for col in (df.columns) if df[col].dtype == 'object'] +#print(df.shape) +print(category_index) +df_new = pd.get_dummies(df, columns=columns) +X_train, X_test, y_train, y_test = split_dataset(df_new) +def ohe_encode(X_train, X_test, defaults=category_index): + X_transform, X_test_transform = label_encode(X_train, X_test) + return X_transform, X_test_transform -# Write your code below +ohe_encode(X_train, X_test, category_index) - - - diff --git a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc index 8c87a88..c3a0bdf 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc index 1956a19..8b256f1 100644 Binary files a/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc and b/q03_ohe_encoder/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc index d44a511..d985d65 100644 Binary files a/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/__pycache__/build.cpython-36.pyc b/q04_data_visualisation/__pycache__/build.cpython-36.pyc index 2bfbd4e..88df4d6 100644 Binary files a/q04_data_visualisation/__pycache__/build.cpython-36.pyc and b/q04_data_visualisation/__pycache__/build.cpython-36.pyc differ diff --git a/q04_data_visualisation/build.py b/q04_data_visualisation/build.py index 9c15ad9..40e1e3e 100644 --- a/q04_data_visualisation/build.py +++ b/q04_data_visualisation/build.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# %load q04_data_visualisation/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset from greyatomlib.multivariate_regression_project.q03_data_encoding.build import label_encode @@ -10,7 +10,13 @@ x_train,x_test = label_encode(x_train,x_test) # Write your code below - - - +plt.switch_backend('agg') +def visualise_data(data, figname): + x_train, x_test, y_train, y_test = split_dataset(data) + plt = scatter_matrix(data, alpha=0.2) + #plt.show() + #plt.figtext = figname + return plt + +visualise_data(data, 'Test') diff --git a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc index 6631d03..e647592 100644 Binary files a/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc index 5353356..59df698 100644 Binary files a/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc and b/q04_data_visualisation/tests/__pycache__/test.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc index 06a2a9b..e55f7b5 100644 Binary files a/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc index c40d112..d9aeae4 100644 Binary files a/q05_linear_regression_model/__pycache__/build.cpython-36.pyc and b/q05_linear_regression_model/__pycache__/build.cpython-36.pyc differ diff --git a/q05_linear_regression_model/build.py b/q05_linear_regression_model/build.py index 7a0a243..302fe04 100644 --- a/q05_linear_regression_model/build.py +++ b/q05_linear_regression_model/build.py @@ -1,3 +1,4 @@ +# %load q05_linear_regression_model/build.py from greyatomlib.multivariate_regression_project.q01_load_data.build import load_data from greyatomlib.multivariate_regression_project.q02_data_split.build import split_dataset @@ -12,4 +13,10 @@ # Write your code below - +def linear_regression(X, y): + linear_reg = LinearRegression() + model = linear_reg.fit(X, y) + return model + +linear_regression(x_train, y_train) + diff --git a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc index 296bcce..f04d816 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc index 54551b9..3052924 100644 Binary files a/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc and b/q05_linear_regression_model/tests/__pycache__/test.cpython-36.pyc differ