# Q(C)

### Import libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.linear_model import LinearRegression

In [2]:
boston = pd.read_csv("Boston.csv")
boston.head(20)

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
0,1,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,2,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,3,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,4,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,5,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2
5,6,0.02985,0.0,2.18,0,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21,28.7
6,7,0.08829,12.5,7.87,0,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43,22.9
7,8,0.14455,12.5,7.87,0,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15,27.1
8,9,0.21124,12.5,7.87,0,0.524,5.631,100.0,6.0821,5,311,15.2,386.63,29.93,16.5
9,10,0.17004,12.5,7.87,0,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1,18.9


In [3]:
boston.columns

Index(['Unnamed: 0', 'crim', 'zn', 'indus', 'chas', 'nox', 'rm', 'age', 'dis',
       'rad', 'tax', 'ptratio', 'black', 'lstat', 'medv'],
      dtype='object')

#### CRIM - per capita crime rate by town
#### ZN - proportion of residential land zoned for lots over 25,000 sq.ft.
#### INDUS - proportion of non-retail business acres per town.
#### CHAS - Charles River dummy variable (1 if tract bounds river; 0 otherwise)
#### NOX - nitric oxides concentration (parts per 10 million)
#### RM - average number of rooms per dwelling
#### AGE - proportion of owner-occupied units built prior to 1940
#### DIS - weighted distances to five Boston employment centres
#### RAD - index of accessibility to radial highways
#### TAX - full-value property-tax rate per 10,000 dollars
#### PTRATIO - pupil/teacher ratio by town
#### B - 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
#### LSTAT - % lower status of the population
#### MEDV - Median value of owner-occupied homes in $1000's

In [4]:
boston.describe()

Unnamed: 0.1,Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,black,lstat,medv
count,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0,506.0
mean,253.5,3.613524,11.363636,11.136779,0.06917,0.554695,6.284634,68.574901,3.795043,9.549407,408.237154,18.455534,356.674032,12.653063,22.532806
std,146.213884,8.601545,23.322453,6.860353,0.253994,0.115878,0.702617,28.148861,2.10571,8.707259,168.537116,2.164946,91.294864,7.141062,9.197104
min,1.0,0.00632,0.0,0.46,0.0,0.385,3.561,2.9,1.1296,1.0,187.0,12.6,0.32,1.73,5.0
25%,127.25,0.082045,0.0,5.19,0.0,0.449,5.8855,45.025,2.100175,4.0,279.0,17.4,375.3775,6.95,17.025
50%,253.5,0.25651,0.0,9.69,0.0,0.538,6.2085,77.5,3.20745,5.0,330.0,19.05,391.44,11.36,21.2
75%,379.75,3.677083,12.5,18.1,0.0,0.624,6.6235,94.075,5.188425,24.0,666.0,20.2,396.225,16.955,25.0
max,506.0,88.9762,100.0,27.74,1.0,0.871,8.78,100.0,12.1265,24.0,711.0,22.0,396.9,37.97,50.0


In [5]:
boston.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  506 non-null    int64  
 1   crim        506 non-null    float64
 2   zn          506 non-null    float64
 3   indus       506 non-null    float64
 4   chas        506 non-null    int64  
 5   nox         506 non-null    float64
 6   rm          506 non-null    float64
 7   age         506 non-null    float64
 8   dis         506 non-null    float64
 9   rad         506 non-null    int64  
 10  tax         506 non-null    int64  
 11  ptratio     506 non-null    float64
 12  black       506 non-null    float64
 13  lstat       506 non-null    float64
 14  medv        506 non-null    float64
dtypes: float64(11), int64(4)
memory usage: 59.4 KB


In [6]:
boston.shape

(506, 15)

In [7]:
boston.isnull().sum()

Unnamed: 0    0
crim          0
zn            0
indus         0
chas          0
nox           0
rm            0
age           0
dis           0
rad           0
tax           0
ptratio       0
black         0
lstat         0
medv          0
dtype: int64

In [8]:
boston['age'].value_counts

<bound method IndexOpsMixin.value_counts of 0      65.2
1      78.9
2      61.1
3      45.8
4      54.2
       ... 
501    69.1
502    76.7
503    91.0
504    89.3
505    80.8
Name: age, Length: 506, dtype: float64>

# LINEAR REGRESSION

##### IS THERE A RELATION BETWEEN AGE AND CRIME!

### simple linear regression

In [18]:
y = boston.crim
x = boston.age

In [19]:
model = LinearRegression()

In [20]:
model.fit(np.array(x).reshape(-1,1),y)

LinearRegression()

In [21]:
model.intercept_ #b0

-3.7779063179682684

In [22]:
model.coef_ #b1

array([0.10778623])

In [23]:
model = sm.OLS(y, x).fit()

In [24]:
model.summary()

0,1,2,3
Dep. Variable:,crim,R-squared (uncentered):,0.232
Model:,OLS,Adj. R-squared (uncentered):,0.231
Method:,Least Squares,F-statistic:,152.8
Date:,"Sat, 30 Apr 2022",Prob (F-statistic):,7.39e-31
Time:,00:50:06,Log-Likelihood:,-1780.7
No. Observations:,506,AIC:,3563.0
Df Residuals:,505,BIC:,3568.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
age,0.0606,0.005,12.363,0.000,0.051,0.070

0,1,2,3
Omnibus:,588.282,Durbin-Watson:,0.901
Prob(Omnibus):,0.0,Jarque-Bera (JB):,38772.874
Skew:,5.54,Prob(JB):,0.0
Kurtosis:,44.428,Cond. No.,1.0


In [37]:
x = boston.zn
y = boston.crim

In [38]:
lr = sm.OLS(y, x).fit()
lr

<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x2652e1fbd60>

In [39]:
model.summary()

0,1,2,3
Dep. Variable:,crim,R-squared (uncentered):,0.232
Model:,OLS,Adj. R-squared (uncentered):,0.231
Method:,Least Squares,F-statistic:,152.8
Date:,"Sat, 30 Apr 2022",Prob (F-statistic):,7.39e-31
Time:,00:57:04,Log-Likelihood:,-1780.7
No. Observations:,506,AIC:,3563.0
Df Residuals:,505,BIC:,3568.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
age,0.0606,0.005,12.363,0.000,0.051,0.070

0,1,2,3
Omnibus:,588.282,Durbin-Watson:,0.901
Prob(Omnibus):,0.0,Jarque-Bera (JB):,38772.874
Skew:,5.54,Prob(JB):,0.0
Kurtosis:,44.428,Cond. No.,1.0
