In [None]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
warnings.filterwarnings("ignore", message="X does not have valid feature names")

In [None]:
df = pd.read_csv('weight-height.csv')
df.head()

In [None]:
plt.scatter(df['Weight'],df['Height'])
plt.xlabel("Weight")
plt.ylabel("Height")

In [None]:
sns.pairplot(df)

In [None]:
df.drop("Gender", axis=1, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df.isnull().sum()

In [None]:
X = df[['Weight']]
y = df['Height']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
regression = LinearRegression(n_jobs=-1)
regression.fit(X_train, y_train)

In [None]:
print("Coefficient or slope:",regression.coef_)
print("Intercept:",regression.intercept_)

In [None]:
plt.scatter(X_train, y_train, color='skyblue', label='Data Points')
plt.plot(X_train, regression.predict(X_train), color='red', linewidth=2, label='Fit Line')

plt.xlabel('X_train')
plt.ylabel('y_train')
plt.legend()

plt.show()

In [None]:
y_pred = regression.predict(X_test)

In [None]:
mse = mean_squared_error(y_test,y_pred)
mae = mean_absolute_error(y_test,y_pred)
rmse = np.sqrt(mse)
print("Mean Squared Error: ",mse)
print("Mean Absolute Error: ",mae)
print("Root Mean Squared Error: ",rmse)

In [None]:
r2=r2_score(y_test,y_pred)
n = len(y_train)
p = X_train.shape[1]
adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)

print("r2_score: ", r2)
print("Adjusted r2_score: ", adj_r2)

In [None]:
model = sm.OLS(y_train,X_train).fit()
prediction=model.predict(X_test)
print(prediction)

In [None]:
model.summary()

In [None]:
## Prediction For new data
regression.predict(scaler.transform([[172]]))