## Import Libraries

In [1]:
import pandas as pd
import numpy as np

## Import Data

In [2]:
df = pd.read_csv("credit.csv")
df.head()

Unnamed: 0,Income,Limit,Rating,Cards,Age,Education,Gender,Student,Married,Ethnicity,Balance
0,14.891,3606,283,2,34,11,Male,No,Yes,Caucasian,333
1,106.025,6645,483,3,82,15,Female,Yes,Yes,Asian,903
2,104.593,7075,514,4,71,11,Male,No,No,Asian,580
3,148.924,9504,681,3,36,11,Female,No,No,Asian,964
4,55.882,4897,357,2,68,16,Male,No,Yes,Caucasian,331


## Check for NULL values

In [3]:
df.isna().any()

Income       False
Limit        False
Rating       False
Cards        False
Age          False
Education    False
Gender       False
Student      False
Married      False
Ethnicity    False
Balance      False
dtype: bool

## Columns

In [4]:
df.columns

Index(['Income', 'Limit', 'Rating', 'Cards', 'Age', 'Education', 'Gender',
       'Student', 'Married', 'Ethnicity', 'Balance'],
      dtype='object')

## Check shape of the dataframe

In [5]:
df.shape

(400, 11)

## Check for max and min values of all the columns

### Define the function

In [6]:
def unique_vals(cols):
    for col in cols:
        print(f"Unique value for {col} is", df[col].unique())

In [7]:
unique_vals(['Gender', 'Student', 'Married', 'Ethnicity'])

Unique value for Gender is ['Male' 'Female']
Unique value for Student is ['No' 'Yes']
Unique value for Married is ['Yes' 'No']
Unique value for Ethnicity is ['Caucasian' 'Asian' 'African American']


In [8]:
def max_min_vals(cols):
    for col in cols:
        print(f"For column {col}: Min Value = {df[col].min()} and Max Value = {df[col].max()}")

## Perform One Hot Encoding

In [9]:
X = df.drop('Limit', axis=1)
y = df.Limit

In [10]:
X.columns

Index(['Income', 'Rating', 'Cards', 'Age', 'Education', 'Gender', 'Student',
       'Married', 'Ethnicity', 'Balance'],
      dtype='object')

In [11]:
X_ohe = pd.get_dummies(X, ['Gender', 'Student', 'Married', 'Ethnicity'])
X_ohe.head()

Unnamed: 0,Income,Rating,Cards,Age,Education,Balance,Gender_Female,Gender_Male,Student_No,Student_Yes,Married_No,Married_Yes,Ethnicity_African American,Ethnicity_Asian,Ethnicity_Caucasian
0,14.891,283,2,34,11,333,False,True,True,False,False,True,False,False,True
1,106.025,483,3,82,15,903,True,False,False,True,False,True,False,True,False
2,104.593,514,4,71,11,580,False,True,True,False,True,False,False,True,False
3,148.924,681,3,36,11,964,True,False,True,False,True,False,False,True,False
4,55.882,357,2,68,16,331,False,True,True,False,False,True,False,False,True


In [12]:
X_ohe.columns

Index(['Income', 'Rating', 'Cards', 'Age', 'Education', 'Balance',
       'Gender_Female', 'Gender_Male', 'Student_No', 'Student_Yes',
       'Married_No', 'Married_Yes', 'Ethnicity_African American',
       'Ethnicity_Asian', 'Ethnicity_Caucasian'],
      dtype='object')

In [13]:
X_ohe = X_ohe.drop(['Gender_Female', 'Student_No', 'Married_No', 'Ethnicity_African American'], axis=1)
X_ohe

Unnamed: 0,Income,Rating,Cards,Age,Education,Balance,Gender_Male,Student_Yes,Married_Yes,Ethnicity_Asian,Ethnicity_Caucasian
0,14.891,283,2,34,11,333,True,False,True,False,True
1,106.025,483,3,82,15,903,False,True,True,True,False
2,104.593,514,4,71,11,580,True,False,False,True,False
3,148.924,681,3,36,11,964,False,False,False,True,False
4,55.882,357,2,68,16,331,True,False,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...
395,12.096,307,3,32,13,560,True,False,True,False,True
396,13.364,296,5,65,17,480,True,False,False,False,False
397,57.872,321,5,67,12,138,False,False,True,False,True
398,37.728,192,1,44,13,0,True,False,True,False,True


## Train Test Split

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X_ohe, y, test_size=0.4, random_state=42)

In [16]:
# test_1 = [14.891, 283, 2, 34, 11,333, 1, 0, 1, 0, 1]
test_1 = [106.025, 483, 3, 82, 15, 903, 0, 1, 1, 0, 1]

## Model Creation

In [17]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()

In [18]:
lr.fit(X_train, y_train)

In [19]:
lr.predict([test_1])



array([6582.98472127])

## Model evaluation

In [20]:
lr.score(X_test, y_test)

0.9960666390121163

## Saving the model

In [21]:
import joblib

In [22]:
filename = "LimitEstimator.pkl"
joblib.dump(lr, filename)

['LimitEstimator.pkl']