<br>
<h1 style="color:pink; text-align:center; font-size:30px; font-family:Arial Black; border-radius:30px 30px; background-color:black; line-height: 50px; padding: 15px 15px 15px 2.5%;">🚀Linear Regression🚀</h1>
<br>

# ✅ Importing Required Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# 📑Reading the Data

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
train = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')
test = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')
ss = pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv')

# 🔍Basic Data Checks

In [None]:
print(f'Shape of Train dataset is : {train.shape}')
print(f'Shape of Test dataset is : {test.shape}')
print(f'Shape of Sample Submission dataset is : {ss.shape}')

In [None]:
def check_NAN_columns(df, df_name):
    if len(df.columns[df.isnull().any()]) == 0:
        print(f'No missing data in {df_name} dataset')
    else:
        print(f'The following columns are having missing data in {df_name} dataset:')
        print(df.columns[df.isnull().any()])

In [None]:
datasets = {
    'Train': train,
    'Test': test,
    'Sample Submission': ss,
}
for df_name, df in datasets.items():
    check_NAN_columns(df, df_name)

In [None]:
train.describe().T.style.background_gradient(subset = ['count'], cmap = 'viridis') \
    .bar(subset = ['mean', '50%'], color = '#1E90FF') \
    .bar(subset = ['std'], color = '#DC143C')

In [None]:
test.describe().T.style.background_gradient(subset = ['count'], cmap = 'viridis') \
    .bar(subset = ['mean', '50%'], color = '#1E90FF') \
    .bar(subset = ['std'], color = '#DC143C')

In [None]:
corr = train.corr()
mask = np.triu(np.ones_like(corr, dtype = bool))

plt.figure(figsize = (12, 10))
plt.title('Corelation matrix')
sns.heatmap(corr, mask = mask, cmap = 'rocket_r', linewidths = .5)
plt.show()

In [None]:
targets = ['target_carbon_monoxide','target_benzene','target_nitrogen_oxides']

In [None]:
fig, ax = plt.subplots(1,3,figsize=(18, 6))
fig.suptitle('Target Distributions', size ='xx-large')

for idx, col in enumerate(targets):
    sns.kdeplot(train[col], ax=ax[idx], fill=True)
    ax[idx].set_title(col)

<div class="alert alert-block alert-info">
📌All the targets are left skewed.    
</div>

# 🎯Model Creation

In [None]:
X = train.drop(['date_time','target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides'], axis=1)
y = train[targets]
X_test = test.drop("date_time", axis=1)

In [None]:
reg1 = LinearRegression().fit(X, y.iloc[:, 0])
reg2 = LinearRegression().fit(X, y.iloc[:, 1])
reg3 = LinearRegression().fit(X, y.iloc[:, 2])

# 📁 Submission file

In [None]:
ss[targets[0]] = reg1.predict(X_test)
ss[targets[1]] = reg2.predict(X_test)
ss[targets[2]] = reg3.predict(X_test)
ss.to_csv("/kaggle/working/sub_lr.csv", index=False)

# ⬆Kindly Upvote, if you like this notebook.
# Thank you🙏