In [1]:
# Import the modules
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
%matplotlib inline

---

In [2]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
#file_path = Path("CTG3.csv")
file_path = Path("data_preprocessed.csv")
file_path = Path("data_real_world.csv")

df = pd.read_csv(file_path)

# Review the DataFrame
df.head()

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,MLTV,NSP
0,148,0,0,1,0,0,0,38,0.9,17,12.2,2
1,148,2,0,8,0,0,0,42,0.7,12,7.1,1
2,148,2,0,3,0,0,0,37,0.8,0,7.1,1
3,148,0,0,10,0,0,0,44,0.6,16,7.3,2
4,148,0,0,9,0,0,0,44,0.6,19,6.9,2


In [3]:
df1 = df.loc[df["NSP"] == 1]
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21 entries, 1 to 29
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LB      21 non-null     int64  
 1   AC      21 non-null     int64  
 2   FM      21 non-null     int64  
 3   UC      21 non-null     int64  
 4   DL      21 non-null     int64  
 5   DS      21 non-null     int64  
 6   DP      21 non-null     int64  
 7   ASTV    21 non-null     int64  
 8   MSTV    21 non-null     float64
 9   ALTV    21 non-null     int64  
 10  MLTV    21 non-null     float64
 11  NSP     21 non-null     int64  
dtypes: float64(2), int64(10)
memory usage: 2.1 KB


In [4]:
df2 = df.loc[df["NSP"] == 2]
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6 entries, 0 to 20
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LB      6 non-null      int64  
 1   AC      6 non-null      int64  
 2   FM      6 non-null      int64  
 3   UC      6 non-null      int64  
 4   DL      6 non-null      int64  
 5   DS      6 non-null      int64  
 6   DP      6 non-null      int64  
 7   ASTV    6 non-null      int64  
 8   MSTV    6 non-null      float64
 9   ALTV    6 non-null      int64  
 10  MLTV    6 non-null      float64
 11  NSP     6 non-null      int64  
dtypes: float64(2), int64(10)
memory usage: 624.0 bytes


In [5]:
df3 = df.loc[df["NSP"] == 3]
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3 entries, 19 to 22
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LB      3 non-null      int64  
 1   AC      3 non-null      int64  
 2   FM      3 non-null      int64  
 3   UC      3 non-null      int64  
 4   DL      3 non-null      int64  
 5   DS      3 non-null      int64  
 6   DP      3 non-null      int64  
 7   ASTV    3 non-null      int64  
 8   MSTV    3 non-null      float64
 9   ALTV    3 non-null      int64  
 10  MLTV    3 non-null      float64
 11  NSP     3 non-null      int64  
dtypes: float64(2), int64(10)
memory usage: 312.0 bytes


In [6]:
def susp_to_path(NSP):
    if NSP == 3 or NSP == 2:
        return 1
    else:
        return 0

df["NSP"] = df["NSP"].apply(susp_to_path)

df.head()

Unnamed: 0,LB,AC,FM,UC,DL,DS,DP,ASTV,MSTV,ALTV,MLTV,NSP
0,148,0,0,1,0,0,0,38,0.9,17,12.2,1
1,148,2,0,8,0,0,0,42,0.7,12,7.1,0
2,148,2,0,3,0,0,0,37,0.8,0,7.1,0
3,148,0,0,10,0,0,0,44,0.6,16,7.3,1
4,148,0,0,9,0,0,0,44,0.6,19,6.9,1


In [7]:
df0 = df.loc[df["NSP"] == 0]
df0.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21 entries, 1 to 29
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LB      21 non-null     int64  
 1   AC      21 non-null     int64  
 2   FM      21 non-null     int64  
 3   UC      21 non-null     int64  
 4   DL      21 non-null     int64  
 5   DS      21 non-null     int64  
 6   DP      21 non-null     int64  
 7   ASTV    21 non-null     int64  
 8   MSTV    21 non-null     float64
 9   ALTV    21 non-null     int64  
 10  MLTV    21 non-null     float64
 11  NSP     21 non-null     int64  
dtypes: float64(2), int64(10)
memory usage: 2.1 KB


In [8]:
df1 = df.loc[df["NSP"] == 1]
df1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 9 entries, 0 to 22
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LB      9 non-null      int64  
 1   AC      9 non-null      int64  
 2   FM      9 non-null      int64  
 3   UC      9 non-null      int64  
 4   DL      9 non-null      int64  
 5   DS      9 non-null      int64  
 6   DP      9 non-null      int64  
 7   ASTV    9 non-null      int64  
 8   MSTV    9 non-null      float64
 9   ALTV    9 non-null      int64  
 10  MLTV    9 non-null      float64
 11  NSP     9 non-null      int64  
dtypes: float64(2), int64(10)
memory usage: 936.0 bytes


In [9]:
df2 = df.loc[df["NSP"] == 2]
df2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LB      0 non-null      int64  
 1   AC      0 non-null      int64  
 2   FM      0 non-null      int64  
 3   UC      0 non-null      int64  
 4   DL      0 non-null      int64  
 5   DS      0 non-null      int64  
 6   DP      0 non-null      int64  
 7   ASTV    0 non-null      int64  
 8   MSTV    0 non-null      float64
 9   ALTV    0 non-null      int64  
 10  MLTV    0 non-null      float64
 11  NSP     0 non-null      int64  
dtypes: float64(2), int64(10)
memory usage: 0.0 bytes


In [10]:
df3 = df.loc[df["NSP"] == 3]
df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 0 entries
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   LB      0 non-null      int64  
 1   AC      0 non-null      int64  
 2   FM      0 non-null      int64  
 3   UC      0 non-null      int64  
 4   DL      0 non-null      int64  
 5   DS      0 non-null      int64  
 6   DP      0 non-null      int64  
 7   ASTV    0 non-null      int64  
 8   MSTV    0 non-null      float64
 9   ALTV    0 non-null      int64  
 10  MLTV    0 non-null      float64
 11  NSP     0 non-null      int64  
dtypes: float64(2), int64(10)
memory usage: 0.0 bytes


In [11]:
#df.to_csv('data_normal_path.csv', index=False)
df.to_csv('data_real_world_np.csv', index=False)