# visualisations


In [6]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

data = {
    "speed": np.random.uniform(low=0, high=300, size=(1,100)).flatten(),
    "pressure": np.random.uniform(low=0, high=10000, size=(1,100)).flatten(),
    "temperature": np.random.uniform(low=0, high=150, size=(1,100)).flatten(),
    "consumption": np.random.uniform(low=0, high=50, size=(1,100)).flatten(),
}

df = pd.DataFrame(data)
print(df.describe())

fig_before = go.Figure()

for column in df.columns:
  fig_before.add_trace(go.Scatter(x=np.arange(len(df)), y=df[column], mode='lines', name=column))

fig_before.update_layout(
    title='Data Before Normalization',
    xaxis_title="Index",
    yaxis_title="Value",
    legend_title="Indicators"
)


scaler = MinMaxScaler()
df_normalized = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

fig_after = go.Figure()

for column in df_normalized.columns:
  fig_after.add_trace(go.Scatter(x=np.arange(len(df_normalized)), y=df_normalized[column], mode='lines', name=column))

fig_after.update_layout(
    title='Data After Normalization',
    xaxis_title="Index",
    yaxis_title="Normalized Value",
    legend_title="Indicators"
)

fig_before.show()
fig_after.show()

            speed     pressure  temperature  consumption
count  100.000000   100.000000   100.000000   100.000000
mean   147.073756  5035.057125    66.892741    24.640215
std     81.937320  2815.426430    42.056487    14.727065
min      5.893655     5.198093     1.058301     0.473743
25%     71.664703  2589.667628    31.531531    12.078910
50%    155.598875  5055.291553    58.702014    22.148329
75%    209.919886  7340.625468   104.508116    36.957716
max    296.167515  9961.897413   145.771702    49.145197


# MinMax Scaler

In [5]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

data = {
    "speed": [30, 90, 10, 40, 100, 983],
    "pressure": [3, 2, 3, 1, 0, 1]
}

df = pd.DataFrame(data)

fig_before = go.Figure()

for column in df.columns:
  fig_before.add_trace(go.Scatter(x=np.arange(len(df)), y=df[column], mode='lines', name=column))

fig_before.update_layout(
    title='Data Before Normalization',
    xaxis_title="Index",
    yaxis_title="Value",
    legend_title="Indicators"
)


min_max_scaler = MinMaxScaler()
min_max_scaled_data = min_max_scaler.fit_transform(df);

min_max_scaled_df = pd.DataFrame(min_max_scaled_data, columns=df.columns)

fig_after = go.Figure()

for column in min_max_scaled_df.columns:
  fig_after.add_trace(go.Scatter(x=np.arange(len(min_max_scaled_df)), y=min_max_scaled_df[column], mode='lines', name=column))

fig_after.update_layout(
    title='Data After Normalization',
    xaxis_title="Index",
    yaxis_title="Normalized Value",
    legend_title="Indicators"
)

fig_before.show()
fig_after.show()

# Standartization (Z-score normalization)

In [9]:
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler

grades = np.random.normal(loc=50, scale=20, size=1000)

grades = np.clip(grades, 0, 100)
grades

scaler = StandardScaler()
grades_standardized = scaler.fit_transform(grades.reshape(-1, 1)).flatten()

fig = go.Figure()
fig.add_trace(go.Histogram(x=grades, name="Original"))

fig.update_layout(
    title="Distribution of Grades with Issues",
    xaxis_title="Grades",
    yaxis_title="Probability Density"
)

fig.add_trace(go.Histogram(x=grades_standardized, name="Standardized"))

fig.update_layout(
    barmode="overlay",
    title_text="Distribution of Grades Before and After Standardization"
)

fig.show()

# Previous Task

In [15]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import tensorflow as tf
from tensorflow.keras import layers, models
import plotly.express as px
import joblib

print("Original DataFrame:")
df = pd.read_csv('alarms_no_scalse.csv')
fig = px.scatter_3d (df, x='sound', y='visibility', z='alarm', color='alarm', color_continuous_scale=["#FF2525", "#FFE53B"])
fig.show()

print("Original DataFrame:")
print(df)


columns_to_scale = ['sound', 'distance', 'visibility']
min_max_scaler = MinMaxScaler()
df[columns_to_scale] = min_max_scaler.fit_transform(df[columns_to_scale])
joblib.dump(min_max_scaler, 'min_max_scaler.pkl')

min_max_scaled_df = pd.DataFrame(df, columns=df.columns)

print("\nMin-Max Scaled DataFrame:")
print(min_max_scaled_df)
fig = px.scatter_3d (min_max_scaled_df, x='sound', y='visibility', z='alarm', color='alarm', color_continuous_scale=["#784BA0", "#FF3CAC"])
fig.show()

X = min_max_scaled_df[columns_to_scale].values
Y = min_max_scaled_df["alarm"].values

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

model = models.Sequential([
    layers.Dense(8, input_shape=(x_train.shape[1],), activation="relu"),
    layers.Dense(2, activation="sigmoid"),
    layers.Dense(1, activation="sigmoid")
    # layers.Dense(8, activation="sigmoid")
])

model.compile(optimizer="adam", loss="mean_squared_error", metrics=["mse"])

model.fit(x_train, y_train, epochs=50, batch_size=32, validation_data=(x_test, y_test))

loss = model.evaluate(x_test, y_test)
print("Test Loss:", loss)

Original DataFrame:


Original DataFrame:
          sound  distance  visibility     alarm
0     24.198630  6.634561      116182  0.299456
1      0.398612  9.320522       35130  0.164304
2      7.850326  7.889677      100325  0.270989
3     22.465501  7.738801       59180  0.322714
4     31.681223  7.115721       35329  0.254386
...         ...       ...         ...       ...
2995  59.130638  3.241602      242615  0.547849
2996  47.971264  2.512568      211907  0.878060
2997  55.173557  3.398722      244827  0.609202
2998  38.744595  1.539259      209321  0.658129
2999  39.214225  4.932947      226251  0.869095

[3000 rows x 4 columns]

Min-Max Scaled DataFrame:
         sound  distance  visibility     alarm
0     0.403332  0.663460    0.443148  0.299456
1     0.006643  0.932117    0.133918  0.164304
2     0.130845  0.789000    0.382650  0.270989
3     0.374445  0.773909    0.225673  0.322714
4     0.528049  0.711587    0.134677  0.254386
...        ...       ...         ...       ...
2995  0.985564  0.32408

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Test Loss: [0.01760612428188324, 0.01760612428188324]


In [14]:
import numpy as np

np.set_printoptions(suppress=True)
data = np.array([
    [50, 9, 25000],
    [13, 1, 650],
    [38, 6, 1980],
    [11, 5, 300950],
    [23, 2, 5100],
    [46, 1, 980],
])
before_scaling_predictions = model.predict(data)
print(f"Predictions before scaling: {before_scaling_predictions}")


scaler = joblib.load("min_max_scaler.pkl")

new_data_w_features = pd.DataFrame(data, columns=["sound", "distance", "visibility"])
new_data_scaled = scaler.transform(new_data_w_features)
after_scaling_predictions = model.predict(new_data_scaled)

print(f"Predictions after scaling: {after_scaling_predictions}")

Predictions before scaling: [[0.75909543]
 [0.75909543]
 [0.75909543]
 [0.75909543]
 [0.75909543]
 [0.75909543]]
Predictions after scaling: [[0.24961951]
 [0.38047683]
 [0.26600537]
 [0.67253447]
 [0.38706103]
 [0.58921194]]
