In [None]:
import pandas as pd;
# Set the display format for the Floating Point numbers
pd.options.display.float_format = "{:,.2f}".format

# To store dataset in a Pandas Dataframe
df_raw = pd.read_csv('airfoil_self_noise.dat.txt', sep='\t', header=None)
df_raw

df = df_raw.rename(columns = {0:'Frequency', \
                              1:'AttackAngle', \
                              2:'ChordLength', \
                              3:'FreeStreamVelocity', \
                              4:'Thickness', \
                              5:'ScaledSoundPressureLevel'}, \
                   inplace=False)

df.head()
df.shape
df.dtypes
df.isnull().sum()

import matplotlib.pyplot as plt;
fig = plt.figure(figsize=(20,20))
for i, feature in enumerate(df.columns):
    ax = fig.add_subplot(2, 3, i+1)
    df[feature].hist(bins=20, ax=ax, facecolor='green')
    ax.set_title(feature, color='red')
fig.tight_layout()
plt.show()

import seaborn as sb;
fig = plt.figure(figsize=(20,20))
for i in range(5):
    if i > 0:
        ax = fig.add_subplot(2, 2, i)
        sb.boxplot(data=df.iloc[:,i])
        ax.set_title(df.columns[i], color='red')
plt.show()

len(df.query('AttackAngle > 20'))

df_clean = df.copy()
df_clean = df_clean.query('AttackAngle <= 20')
df_clean.shape

fig = plt.figure(figsize=(20,20))
for i in range(5):
    if i > 0:
        ax = fig.add_subplot(2, 2, i)
        sb.boxplot(data=df_clean.iloc[:,i])
        ax.set_title(df_clean.columns[i], color='red')
plt.show()


# Create an empty Data Frame to store all the Correlations
corrdf = pd.DataFrame(columns = ['Dependent Variable', 'Feature',␣
                                 ,→'Correlation']);
# Loop through all the Features in scope
df1 = df['ScaledSoundPressureLevel'];
for j in df_clean.iloc[:,1:5].columns:
    df2 = df[j];
    c = df1.corr(df2);
    # Add the computed Correlation to the Data Frame
    corrdf = corrdf.append({'Dependent Variable':'ScaledSoundPressureLevel',␣
                            ,→'Feature':j, 'Correlation':c}, ignore_index = True);
corrdf



from sklearn.model_selection import train_test_split
df_train, df_test = train_test_split(df_clean, train_size=0.7, random_state=44)
y_train = df_train[['ScaledSoundPressureLevel']]
x_train = df_train.drop("ScaledSoundPressureLevel", axis=1)
y_test = df_test[['ScaledSoundPressureLevel']]
x_test = df_test.drop("ScaledSoundPressureLevel", axis=1)


from sklearn.linear_model import LinearRegression
lr = LinearRegression(normalize = True)
lr.fit(x_train, y_train)

y_pred_train = lr.predict(x_train)
y_pred_train

import sklearn.metrics as sm
print("Mean absolute error =", round(sm.mean_absolute_error(y_train,␣
                                                            ,→y_pred_train), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_train,␣
                                                          ,→y_pred_train), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_train,␣
                                                                ,→y_pred_train), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_train,␣
                                                                    ,→y_pred_train), 2))

print("R2 score =", round(sm.r2_score(y_train, y_pred_train), 2))
y_pred_test = lr.predict(x_test)
print("Mean absolute error =", round(sm.mean_absolute_error(y_test,␣
,→y_pred_test), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_pred_test),␣
,→2))
print("Median absolute error =", round(sm.median_absolute_error(y_test,␣
,→y_pred_test), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_test,␣
,→y_pred_test), 2))
print("R2 score =", round(sm.r2_score(y_test, y_pred_test), 2))


from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
rf = make_pipeline(StandardScaler(), RandomForestRegressor())
rf.fit(x_train, y_train)


y_pred_train = rf.predict(x_train)
y_pred_train

print("Mean absolute error =", round(sm.mean_absolute_error(y_train,␣
,→y_pred_train), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_train,␣
,→y_pred_train), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_train,␣
,→y_pred_train), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_train,␣
,→y_pred_train), 2))
print("R2 score =", round(sm.r2_score(y_train, y_pred_train), 2))


y_pred_test = rf.predict(x_test)
print("Mean absolute error =", round(sm.mean_absolute_error(y_test,␣
,→y_pred_test), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_pred_test),␣
,→2))
print("Median absolute error =", round(sm.median_absolute_error(y_test,␣
,→y_pred_test), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_test,␣
,→y_pred_test), 2))
print("R2 score =", round(sm.r2_score(y_test, y_pred_test), 2))


from sklearn.svm import SVR
svr = make_pipeline(StandardScaler(), SVR(C = 30.0, epsilon = 0.9))
svr.fit(x_train, y_train)


 y_pred_train = svr.predict(x_train)
print("Mean absolute error =", round(sm.mean_absolute_error(y_train,␣
,→y_pred_train), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_train,␣
,→y_pred_train), 2))
print("Median absolute error =", round(sm.median_absolute_error(y_train,␣
,→y_pred_train), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_train,␣
,→y_pred_train), 2))
print("R2 score =", round(sm.r2_score(y_train, y_pred_train), 2))


y_pred_test = svr.predict(x_test)
print("Mean absolute error =", round(sm.mean_absolute_error(y_test,␣
,→y_pred_test), 2))
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_pred_test),␣
,→2))
print("Median absolute error =", round(sm.median_absolute_error(y_test,␣
,→y_pred_test), 2))
print("Explain variance score =", round(sm.explained_variance_score(y_test,␣
,→y_pred_test), 2))
print("R2 score =", round(sm.r2_score(y_test, y_pred_test), 2))
