In [4]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [5]:
dataset_names = sns.get_dataset_names()
print(dataset_names)

['anagrams', 'anscombe', 'attention', 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'dowjones', 'exercise', 'flights', 'fmri', 'geyser', 'glue', 'healthexp', 'iris', 'mpg', 'penguins', 'planets', 'seaice', 'taxis', 'tips', 'titanic']


In [6]:
df = sns.load_dataset('tips')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB


In [8]:
df.sample(5)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
71,17.07,3.0,Female,No,Sat,Dinner,3
82,10.07,1.83,Female,No,Thur,Lunch,1
215,12.9,1.1,Female,Yes,Sat,Dinner,2
230,24.01,2.0,Male,Yes,Sat,Dinner,4
85,34.83,5.17,Female,No,Thur,Lunch,4


In [9]:
df['day'].value_counts()

day
Sat     87
Sun     76
Thur    62
Fri     19
Name: count, dtype: int64

In [10]:
df = pd.get_dummies(df,columns=['day'],dtype=int)
df['sex'] = np.where(df['sex']=='Male',1,0)
df['smoker'] = np.where(df['smoker'] == 'Yes',1,0)
df['time'] = np.where(df['time']=='Dinner',1,0)


In [11]:
df

Unnamed: 0,total_bill,tip,sex,smoker,time,size,day_Thur,day_Fri,day_Sat,day_Sun
0,16.99,1.01,0,0,1,2,0,0,0,1
1,10.34,1.66,1,0,1,3,0,0,0,1
2,21.01,3.50,1,0,1,3,0,0,0,1
3,23.68,3.31,1,0,1,2,0,0,0,1
4,24.59,3.61,0,0,1,4,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,1,0,1,3,0,0,1,0
240,27.18,2.00,0,1,1,2,0,0,1,0
241,22.67,2.00,1,1,1,2,0,0,1,0
242,17.82,1.75,1,0,1,2,0,0,1,0


In [12]:
X = df.drop(columns=['tip'])
y = df['tip']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

In [18]:
class LinearRegression():
    def __init__(self, learning_rate=0.01, iteration=1000):
        self.learning_rate = learning_rate
        self.iteration = iteration

    def fit(self, X, Y):
        self.m, self.n = X.shape
        self.W = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y
        
        for i in range(self.iteration):
            self.update_weight()
        return self

    def update_weight(self):
        Y_pred = self.predict(self.X)

        dW = (-2 / self.m) * (self.X.T.dot(self.Y - Y_pred))
        db = (-2 / self.m) * np.sum(self.Y - Y_pred)

        self.W = self.W - self.learning_rate * dW
        self.b = self.b - self.learning_rate * db

    def predict(self, X):
        return X.dot(self.W) + self.b


In [20]:
model = LinearRegression(iteration = 1000, learning_rate = 0.01)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
