In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
#CSV file directory
df = pd.read_csv('../data/SOCR-HeightWeight.csv')

#Indicating table
df.head()


In [None]:
#Data dimensions
df.shape

In [None]:
#Checking the data type
df.info()

In [None]:
#Descriptive statistics
df.describe()

In [None]:
#Checking for missing values
df.isnull().sum()

In [None]:
#Draw the initial diagram (to see the relationship
plt.figure(figsize=(8,5))
sns.scatterplot(x='Height(Inches)', y='Weight(Pounds)', data=df)
plt.title('Height vs Weight')
plt.xlabel('Height (inches)')
plt.ylabel('Weight (pounds)')
plt.show()

In [None]:
# Definition of X and y
X = df[['Height(Inches)']]
y = df['Weight(Pounds)']

#train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model making
model = LinearRegression()

# Model training
model.fit(X_train, y_train)

In [None]:
#Prediction
y_pred = model.predict(X_test)

#Assessing
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R² Score:", r2)

In [None]:
#Drawing a regression line
plt.figure(figsize=(8,5))
sns.scatterplot(x=X_test['Height(Inches)'], y=y_test, label='Actual')
sns.lineplot(x=X_test['Height(Inches)'], y=y_pred, color='red', label='Predicted')
plt.title('Linear Regression: Height vs Weight')
plt.xlabel('Height(Inches)')
plt.ylabel('Weight(Pounds)')
plt.legend()
plt.show()