# Linear Regression with Python

## Step1: import libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Step 2: read data and interpret data

In [None]:
df = pd.read_csv('USA_Housing.csv')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

## Step 3: exploratory data analysis with Seaborn

In [None]:
sns.pairplot(df)

In [None]:
sns.displot(df['Price'])

In [None]:
sns.heatmap(df.corr(), cmap = 'Blues')

## Step 4: split data for training and testing

In [None]:
df.columns

In [None]:
X = df[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
       'Avg. Area Number of Bedrooms', 'Area Population']]

In [None]:
y = df['Price']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.4, random_state=101)

## Step 5: train model with linear regression

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lm = LinearRegression()

In [None]:
lm.fit(X_train, y_train)

## Step 6: model interpretation

In [None]:
lm.intercept_

In [None]:
lm.coef_

In [None]:
pd.DataFrame(lm.coef_, X.columns, columns = ['Coeff'])

## Step 7: model performance evaluation

In [None]:
prediction = lm.predict(X_test)

In [None]:
plt.scatter(y_test, prediction)

In [None]:
sns.displot(y_test-prediction)

In [None]:
from sklearn import metrics

In [None]:
metrics.mean_absolute_error(y_test, prediction)

In [None]:
metrics.mean_squared_error(y_test, prediction)

In [None]:
np.sqrt(metrics.mean_squared_error(y_test, prediction))