- For development we use - ipynb
- For deployment we use - .py
    - Flask / Django / fastapi framework
    - HTML / CSS 

In [7]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as sm # Linear Regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [3]:
# Create a sample data

df = pd.DataFrame({"Speed":np.random.randint(40,80,40),
                   "distance":np.random.randint(20,40,40)})

In [5]:
# If you are travelling at certain speed and hit the brake suddenly how much distance will the vehicle move forward before it stops
# Speed in kmph
# Distance in mtrs
df.head()

Unnamed: 0,Speed,distance
0,66,35
1,58,39
2,64,35
3,55,32
4,77,26


In [8]:
df.shape

(40, 2)

In [16]:
# Independent columns and Dependent column

X = df[["Speed"]]
Y = df[["distance"]]

In [17]:
X.shape,Y.shape

((40, 1), (40, 1))

In [18]:
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,Y,test_size=0.1)

In [19]:
Xtrain.shape, Xtest.shape,Ytrain.shape,Ytest.shape

((36, 1), (4, 1), (36, 1), (4, 1))

# Modeling

In [20]:
model = LinearRegression()

In [21]:
model.fit(X=Xtrain,y=Ytrain)# undergoes training with the data provided

In [25]:
test_df  = pd.DataFrame()

In [26]:
test_df["Speed_test"] = Xtest
test_df["Distance_test"] = Ytest
test_df

Unnamed: 0,Speed_test,Distance_test
11,63,21
7,49,25
16,44,24
22,74,38


In [27]:
test_df["Distance_preditect"] = model.predict(Xtest)

In [28]:
test_df

Unnamed: 0,Speed_test,Distance_test,Distance_preditect
11,63,21,29.874051
7,49,25,30.807604
16,44,24,31.141015
22,74,38,29.140545


In [32]:
train_data = pd.DataFrame()
train_data["Speed"] = Xtrain
train_data["Distance"] = Ytrain

In [31]:
model.score(Xtest,Ytest) # R-squared

-0.42330043038966236

In [34]:
model = sm.ols(formula='Distance~Speed',data=train_data).fit()

In [35]:
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:               Distance   R-squared:                       0.023
Model:                            OLS   Adj. R-squared:                 -0.006
Method:                 Least Squares   F-statistic:                    0.7948
Date:                Tue, 30 Jan 2024   Prob (F-statistic):              0.379
Time:                        21:11:25   Log-Likelihood:                -109.02
No. Observations:                  36   AIC:                             222.0
Df Residuals:                      34   BIC:                             225.2
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     34.0750      4.681      7.279      0.0

# Model Saving

In [36]:
import joblib

In [37]:
joblib.dump(value=model,filename="lesson1.lance")

['lesson1.lance']