## Library
- `pandas`: For loading the file
- `numpy`: For numerical operations
- `joblib`: For loading the model file
- `os`: For input data to file
- `sklearn`: For do the predicting

In [60]:
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.ensemble import RandomForestClassifier

## Load Data

In [61]:
file_path = '/Users/nychanthrith/data-science-project/data/Clean data/cleaned_data_January.csv'
data = pd.read_csv(file_path)

## Splitting Features and Target

In [62]:
X = data.drop(columns=['Month', 'Day', 'Time of Day', 'Target', 'Index'])  # Features
y = data['Target']

## Load Model

In [63]:
model_file_path = '/Users/nychanthrith/data-science-project/notebooks/models/random_forest_model.pkl'
rf = joblib.load(model_file_path)

## Generating New Data

In [64]:
num_samples = 30
new_month_data = pd.DataFrame()

## Creating Randomized Data Based on Feature Statistics

In [65]:
for column in X.columns:
    mean = X[column].mean()
    std = X[column].std()
    new_month_data[column] = np.random.normal(loc=mean, scale=std, size=num_samples)

## Displaying the New Data

In [66]:
print('Predicting the target for the new month data...')
print(new_month_data)

Predicting the target for the new month data...
          pH      Iron   Nitrate  Chloride          Lead      Zinc     Color  \
0   7.329538  0.021167  2.065913  4.977253 -1.055367e-07  0.186854  4.372696   
1   7.747258 -0.008455  2.162884  5.215863  2.931628e-07  0.065872  1.163674   
2   8.605573  0.035558  1.914790  5.213796  5.005224e-07  1.400899  4.050580   
3   7.179460  0.042511  2.186888  5.009125  5.405314e-08  1.324345  5.442422   
4   6.579694  0.033453  1.799624  4.850250 -1.615790e-07 -0.144420  2.314280   
5   6.691984  0.075635  1.575714  5.345413  3.633887e-07  1.084029  1.241913   
6   5.638839  0.059978  1.884620  5.174778  1.929610e-07  0.038647  2.682642   
7   8.176219  0.031951  2.105315  4.836622  2.088801e-07  1.581267  7.270518   
8   7.672312  0.045175  1.727286  4.992094  1.150691e-07  0.571445  2.383046   
9   7.209331 -0.048339  2.201826  5.083146  4.611712e-07  0.843877  5.665090   
10  6.488864 -0.007191  1.953339  5.276846  3.077781e-07  1.306531  3.12

## Verifying Shape and Columns of the New Data

In [67]:
print("Shape of new_month_data:", new_month_data.shape)
print("Columns of new_month_data:", new_month_data.columns)

Shape of new_month_data: (30, 19)
Columns of new_month_data: Index(['pH', 'Iron', 'Nitrate', 'Chloride', 'Lead', 'Zinc', 'Color',
       'Turbidity', 'Fluoride', 'Copper', 'Odor', 'Sulfate', 'Conductivity',
       'Chlorine', 'Manganese', 'Total Dissolved Solids', 'Source',
       'Water Temperature', 'Air Temperature'],
      dtype='object')


## Making Predictions

In [68]:
predictions = rf.predict(new_month_data)

## Adding Predictions to the Data

In [69]:
new_month_data['Predicted Target'] = predictions

## Exporting Predictions to CSV

In [70]:
folder_path = '/Users/nychanthrith/data-science-project/data/Predicted data/'
file_name = 'predicted_target_next_month.csv'

In [71]:
output_file_path = folder_path + file_name
# output.to_csv(output_file_path, index=False)
new_month_data.to_csv(output_file_path, index=False)

print(f"Predictions for the next month's target saved to: {output_file_path}")

Predictions for the next month's target saved to: /Users/nychanthrith/data-science-project/data/Predicted data/predicted_target_next_month.csv


## Display The Predicted Data

In [72]:
predicted_data

Unnamed: 0,pH,Iron,Nitrate,Chloride,Lead,Zinc,Color,Turbidity,Fluoride,Copper,Odor,Sulfate,Conductivity,Chlorine,Manganese,Total Dissolved Solids,Source,Water Temperature,Air Temperature,Predicted Target
0,6.17277,0.050115,1.994325,5.017845,-4.994972e-07,0.635716,1.278707,0.099157,0.675492,0.252603,-0.000316,4.914718,20.373981,1.611,0.008701,400.986692,7.372163,2.619794,37.71779,0
1,8.185389,-0.040758,2.129893,5.265945,2.51684e-08,1.319454,1.400266,0.156529,0.89078,0.278848,2.489559,5.041375,19.462671,1.753483,-0.005637,57.964403,-2.034349,2.677878,84.730976,0
2,8.542531,0.025468,2.252521,4.815399,-2.089911e-07,0.54354,2.070284,-0.064624,0.655678,0.287259,2.408626,5.391995,16.404845,2.329143,0.019376,209.819785,1.289127,3.967433,78.385414,1
3,8.027166,0.027773,1.9685,4.907549,5.732093e-07,0.780615,3.667299,0.078989,0.74739,0.160093,3.080259,5.111376,18.512175,1.82682,0.026701,355.421751,5.047142,2.931554,69.444371,0
4,7.869796,0.067055,1.852866,4.942604,-2.962675e-07,-0.035825,3.966049,0.109183,0.318674,0.320224,2.212657,4.304243,21.645689,2.019997,0.017855,70.654692,4.290045,2.195231,10.378782,0
5,7.687248,0.01832,1.99389,5.781921,-7.161772e-09,0.257288,1.650163,0.138154,0.407795,0.542315,2.386122,4.773189,18.72567,1.898175,0.005094,220.656202,4.715581,3.07181,46.896784,1
6,10.060557,-0.055163,1.682824,5.466781,-5.250373e-07,1.180413,4.879849,0.035165,0.755269,0.306328,1.219383,5.261522,21.231607,1.789674,0.029904,281.714572,5.872666,2.393224,42.799413,0
7,8.183759,0.04797,1.733823,5.119529,-1.912436e-07,0.380486,1.31807,0.224618,0.894833,0.295133,1.646735,5.241092,25.760437,1.8834,0.013119,229.164281,0.501719,3.053866,66.832112,0
8,9.20968,-0.037171,2.137397,4.512147,-5.140652e-07,0.566464,6.51496,0.25892,-0.050884,0.329331,2.166568,4.906521,23.439245,1.536864,-0.007586,20.978795,3.114487,2.550955,74.162086,0
9,7.859326,0.003642,2.188217,4.711001,4.285103e-07,0.615519,1.854264,0.195604,0.885716,0.224465,2.606576,4.735712,21.701756,1.914488,0.022719,276.022645,8.502138,3.186996,63.193734,0
