In [60]:
# include necessary libraries
import numpy as np
import pandas as pd

In [61]:
# Read CSV
credit_df = pd.read_csv('Credit.csv')
credit_df[0::50]

Unnamed: 0,Income,Limit,Rating,Cards,Age,Education,Married,Balance
0,14.891,3606,283,2,34,11,1,333
50,36.362,5183,376,3,49,15,1,654
100,21.153,3736,256,1,41,11,0,298
150,63.931,5728,435,3,28,14,1,581
200,23.949,5343,383,2,40,18,1,829
250,10.363,2430,191,2,47,18,1,0
300,21.786,4632,355,1,50,17,1,580
350,30.002,1561,155,4,70,13,1,0


In [62]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Define data columns vs target column
features = ['Income', 'Limit', 'Rating', 'Cards', 'Age', 'Education', 'Married']
target = 'Balance'

X = credit_df[features]
y = credit_df[target]

# Create testing and training sets 
(X_train, X_test, y_train, y_test) = train_test_split(X, y, test_size=0.24, random_state=4)

# Create scaler for normalizing data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X_train)
print(X_scaled)

[[0.02143181 0.16411395 0.17772778 ... 0.68918919 0.8        0.        ]
 [0.00951327 0.17820493 0.18672666 ... 0.45945946 0.86666667 1.        ]
 [0.048769   0.04824629 0.04049494 ... 0.78378378 0.53333333 0.        ]
 ...
 [0.22886885 0.23908715 0.23734533 ... 0.81081081 0.8        0.        ]
 [0.632403   0.75187624 0.73903262 ... 0.40540541 0.73333333 0.        ]
 [0.2963921  0.4748813  0.43644544 ... 0.81081081 0.4        0.        ]]


In [63]:
# Create Linear Regression model
from sklearn.linear_model import LinearRegression

my_linreg = LinearRegression()
my_linreg.fit(X_scaled, y_train)

# Coefficients of the model
my_linreg.coef_

array([-1327.01928481,   747.64112114,  2752.94751262,    85.99095838,
        -127.92745337,     8.06527562,   -41.47581319])

In [64]:
# Test Accuracy of the model
from sklearn.metrics import mean_squared_error

X_scaled = scaler.transform(X_test)
y_predict_lr = my_linreg.predict(X_scaled)

# Calculating Mean Square Error
mse = mean_squared_error(y_test, y_predict_lr)
rmse = np.sqrt(mse)

print("Linear Regression Mean Square Error: ", rmse) 

Linear Regression Mean Square Error:  161.5138549117532


In [65]:
# Cross Validation
# K folds where K = 10

from sklearn.model_selection import cross_val_score

lr_acc_list = cross_val_score(my_linreg, X, y, cv=10, scoring='neg_mean_squared_error')

print("Linear Regression: \n", lr_acc_list, "\n Mean Error: ", lr_acc_list.mean())

Linear Regression: 
 [-23646.90415342 -32003.04401232 -35462.64435619 -37327.60719635
 -14341.32205938 -33628.37104224 -31631.99317834 -12491.00334951
 -20749.61212176 -23204.9474346 ] 
 Mean Error:  -26448.744890410482
