In [1]:
import pandas as pd
import numpy as np
import sklearn
from matplotlib import pyplot as plt 
from sklearn.metrics import mean_squared_error, r2_score
# from hyperopt import hp, tpe, Trials, fmin
import time
import datetime
import random

# HCS T4SG x NIOSH Lab Temperature Prediction Model
### via Support Vector Regression (SVR)

In [2]:
# open and read data

path = "./LabTempTrends.csv"
df = pd.read_csv(path)
df.head(10)

Unnamed: 0,TB15,Unnamed: 1,T302,Unnamed: 3,T303,Unnamed: 5,T446,Unnamed: 7,T449,Unnamed: 9,...,H460,Unnamed: 31,H309.1,Unnamed: 33,H353,Unnamed: 35,H353.1,Unnamed: 37,TB01.1,Unnamed: 39
0,Time,Trend (deg F),Time,Trend (deg F),Time,Trend (deg F),Time,Trend (deg F),Time,Trend (deg F),...,Time,Trend (deg F),Time,HUMIDITY %. Trend (%RH),Time,HUMIDITY %. Trend (%RH),Time,Trend (deg F),Time,Trend (deg F)
1,4/27/21 1:00:00 PM EDT,74.99469,,,4/27/21 12:00:00 PM EDT,71.69919,4/27/21 1:00:00 PM EDT,72.0625,4/27/21 1:00:00 PM EDT,72.86691,...,,,,,,,,,,
2,4/27/21 2:00:00 PM EDT,74.29407,,,4/27/21 1:00:00 PM EDT,71.7511,4/27/21 2:00:00 PM EDT,72.91879,4/27/21 2:00:00 PM EDT,72.45172,...,,,,,,,,,,
3,4/27/21 3:00:00 PM EDT,74.78711,,,4/27/21 2:00:00 PM EDT,71.64728,4/27/21 3:00:00 PM EDT,72.0625,4/27/21 3:00:00 PM EDT,72.55551,...,,,,,,,,,,
4,4/27/21 4:00:00 PM EDT,74.47571,,,4/27/21 3:00:00 PM EDT,70.06445,4/27/21 4:00:00 PM EDT,72.01059,4/27/21 4:00:00 PM EDT,72.71118,...,,,,,,,,,,
5,4/27/21 5:00:00 PM EDT,74.47571,,,4/27/21 4:00:00 PM EDT,70.583405,4/27/21 5:00:00 PM EDT,72.01059,4/27/21 5:00:00 PM EDT,72.86691,...,,,,,,,,,,
6,4/27/21 6:00:00 PM EDT,74.8909,,,4/27/21 5:00:00 PM EDT,70.920746,4/27/21 6:00:00 PM EDT,73.12637,4/27/21 6:00:00 PM EDT,73.12637,...,,,,,,,,,,
7,4/27/21 7:00:00 PM EDT,75.20227,,,4/27/21 6:00:00 PM EDT,71.28403,4/27/21 7:00:00 PM EDT,73.411804,4/27/21 7:00:00 PM EDT,72.815,...,,,,,,,,,,
8,4/27/21 8:00:00 PM EDT,75.46176,,,4/27/21 7:00:00 PM EDT,71.38782,4/27/21 8:00:00 PM EDT,73.515594,4/27/21 8:00:00 PM EDT,72.24414,...,,,,,,,,,,
9,4/27/21 9:00:00 PM EDT,75.53958,,,4/27/21 8:00:00 PM EDT,71.07645,4/27/21 9:00:00 PM EDT,73.671295,4/27/21 9:00:00 PM EDT,73.12637,...,,,,,,,,,,


## Raw Data Analysis
What the data is trying to show is the temperature or humidity measurements for each lab for each time step. As we can see, there are some problems with the raw data that make it hard to process:

1. For each lab, we have the <em>x</em> column (Time) and the <em>y</em> column (Temperature or Humidity). Because of the structure of the dataframe, if you query a lab name, you will only get the column of time steps because the <em>y</em> column is under an 'Unnamed' header.

2. For some columns, such as Lab TB01, the information is very scattered and temperature is found in a different spot than the data for humidity, meaning there are multiple column headers for 'TB01'

3. Some columns and subcolumns are unlabeled or labeled differently (lab name uses 'Hamilton' instead of 'H').

## Initial Data Transformation
To address the problems above, we'll perform the following transformations on the data:
<ul>
    <li>Before opening and reading the csv file into our program, we make the following changes (if needed) to the file:
        <ul>
            <li>Make sure all lab names begin with the first letter of the building name ('T' or 'H') followed by their building number (ex: H309).</li>
            <li>Make sure all columns related to a specific lab are consolidated in one place, for example, all meausurements for TB01 are in consecutive columns such that there is only one `Time` column for  TB01.</li>
            <li>Make sure all subcolumns are named and are identical across all labs. In this case, all columns for temperature have been named 'Trend (deg F)' and all columns for humidity have been named `HUMIDITY %. Trend (%RH)`.</li>
            <li>General metrics that include building measurements (ex: Outside Temperature) should be named `T Building` or `H Building`. What matters is the key word 'Building.'</li>
            <li>Note: in order for the program to run, the structure of the data must match this exactly, including the namings for the columns and subcolumns. The transformed csv file can be found in 'Documentation'.</li>
        </ul>
    </li>
    <li>Next, we'll alter the overarching data structure `labs` to be a collection of dataframes rather than just one dataframe that contains all the information. The keys of the this dictionary will be the lab name which will point to a dataframe which contains columns for Time, Temperature, and Humidity (if applicable).</li>
    <li>Finally, to store the general metrics, we create a deparate dataframe called `TlabGeneral` to store measurements regarding 'T Building.'</li>
</ul>

In [3]:
labs = {}
TlabGeneral = pd.DataFrame(columns=[])

for col in df:
    # the structure of the data implies the info under each named column (i.e. 'TB01') is the Time column
    if 'Unnamed' not in col:
        # get the column to the right of the Time column (i.e. Temperature column)
        temp_col = df.iloc[:,df.columns.get_indexer([col])+1]
        for a in temp_col:
            next_col = temp_col[a]
        col1 = df[col][1:]
        col2 = next_col[1:]

        # if lab has already been visited, just insert new column into the existing dataframe for that lab
        if col[:4] in labs:
            labs[col[:4]][next_col[0]] = col2
        else:
            # otherwise, create a new dataframe for that lab
            df2 = pd.DataFrame(columns=[df[col][0],next_col[0]])
            df2[df[col][0]] = col1
            df2[next_col[0]] = col2

            # add to general metrics data structure
            if 'Building' in col:
                if df[col][0] not in TlabGeneral.columns:
                    TlabGeneral[df[col][0]] = col1
                TlabGeneral[next_col[0]] = col2
            # add to labs data structure
            else:
                labs[col[:4]] = df2
print(labs)

{'TB15':                         Time Trend (deg F)
1     4/27/21 1:00:00 PM EDT      74.99469
2     4/27/21 2:00:00 PM EDT      74.29407
3     4/27/21 3:00:00 PM EDT      74.78711
4     4/27/21 4:00:00 PM EDT      74.47571
5     4/27/21 5:00:00 PM EDT      74.47571
...                      ...           ...
3231             9/9/21 3:00          73.5
3232             9/9/21 4:00          73.5
3233             9/9/21 5:00          73.5
3234             9/9/21 6:00          72.7
3235             9/9/21 7:00          72.7

[3235 rows x 2 columns], 'T302':              Time Trend (deg F)
1             NaN           NaN
2             NaN           NaN
3             NaN           NaN
4             NaN           NaN
5             NaN           NaN
...           ...           ...
3231  9/9/21 3:00          72.3
3232  9/9/21 4:00          72.3
3233  9/9/21 5:00          72.3
3234  9/9/21 6:00          72.2
3235  9/9/21 7:00          72.2

[3235 rows x 2 columns], 'T303':                        

In [4]:
# drop all null values

for lab in labs.values():
    lab.dropna(inplace=True)
    
TlabGeneral.dropna(inplace=True)

### Column Adjustment and Setback

`labsetback` is a dictionary used to store the setback information for all of the labs. Each lab has three conditions for the three time periods:
<ul>
    <li>Past - 7/28/21</li>
    <li>7/29/21 - 11/9/21</li>
    <li>11/10/21 - Present</li>
</ul>

The limits of these three time period can be found and altered in the `if` statements of `prepareCols()` below. Dates are formatted in datetime format such that `datetime.time(6, 0)` indicates 6 hours and 0 minutes or 6:00, and `datetime.datetime(2021, 7, 28)` indicates 7/28/2021 in month/day/year format. In `labsetback`, there is a start time and end time for each of the time periods. For example for lab H309, the first tuple is `(datetime.time(6, 0), datetime.time(18, 0))`, meaning that for Past - 7/28/21, the setback is turned on at 18:00 (or 6:00pm) and turned off at 6:00 (or 6:00am).

In [5]:
labsetback = {
    'H309': [
        (datetime.time(6, 0), datetime.time(18, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(5, 0), datetime.time(19, 0))
    ],
    'H351': [
        (datetime.time(6, 0), datetime.time(18, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(5, 0), datetime.time(19, 0))
    ],
    'H353': [
        (datetime.time(6, 0), datetime.time(18, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(5, 0), datetime.time(19, 0))
    ],
    'H355': [
        (datetime.time(6, 0), datetime.time(18, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(5, 0), datetime.time(19, 0))
    ],
    'H424': [
        (datetime.time(6, 0), datetime.time(18, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(5, 0), datetime.time(19, 0))
    ],
    'H460': [
        (datetime.time(6, 0), datetime.time(18, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(5, 0), datetime.time(19, 0))
    ],
    'TB01': [
        (datetime.time(6, 0), datetime.time(19, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(5, 0), datetime.time(19, 0))
    ],
    'T446': [
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0))
    ],
    'T302': [
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0))
    ],
    'T303': [
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0)), 
        (datetime.time(0, 0), datetime.time(0, 0))
    ],
}

#### Hour and Month Parameters
Rather than doing a time series split, we thought that the hour of the day and the month was more important than the sequential temporal order of the data. We consider the hour of the day since the setback condition and internal lab conditions often change similarly throughout the day, and we consider the month to account for seasonal cycles.

Note: `prepareCols()` takes in the lab data (dataframe) as well as its name (string). Thus, an example call would be `prepareCols(labs['TB01'], 'TB01')` where `labs['TB01']` fetches the specific dataframe from the `labs` dictionary.

In [6]:
def prepareCols(lab, labname):
    datetimes = pd.to_datetime(lab['Time'])
    lab['Hour'] = [datetime.datetime.time(d) for d in datetimes]
    lab['Month'] = [d.month for d in datetimes]
    # lab['Weekday'] = pd.to_datetime(lab['Time']).apply(lambda x: x.weekday())

    if labname in labsetback:
        setbacks = []
        setbackdict = labsetback[labname]

        for index, row in lab.iterrows():
            # first time period Past - 7/28/21
            if pd.to_datetime(row['Time']) <= datetime.datetime(2021, 7, 28):
                end, start = setbackdict[0]
                if row['Hour'] <= end or row['Hour'] >= start:
                    setbacks.append(1)
                else:
                    setbacks.append(0)
            # second time period 7/29/21 - 11/9/2021
            elif pd.to_datetime(row['Time']) <= datetime.datetime(2021, 11, 9):
                end, start = setbackdict[1]
                if row['Hour'] <= end or row['Hour'] >= start:
                    setbacks.append(1)
                else:
                    setbacks.append(0)
            # third time period 11/10/21 - Present
            else:
                end, start = setbackdict[2]
                if row['Hour'] <= end or row['Hour'] >= start:
                    setbacks.append(1)
                else:
                    setbacks.append(0)
    # if lab is not in labsetback, it means setback is never turned on
    else:
        setbacks = [0] * len(datetimes)



    lab['Setback'] = setbacks
    # lab.drop(["Weekday"], 1, inplace=True)

    for index, row in lab.iterrows():
        lab.loc[index, 'Hour'] = row['Hour'].hour

    return lab


### Adding General Metrics

We write `addGeneral()` to add Outside Temperature as a parameter in our temperature model.
<br />
Note: Measurements for Outside Temperature for the general T Building do not begin until after measurements for individual labs have already begun. To keep the data consistent in the training and testing of our model, we drop all data for the timesteps that are missing one or more of the parameters. So if Outside Temperature is not measured for the first 100 timesteps, we drop all of the data for those first 100 timesteps and do not take it into account for our model. We only consider data where all measurements - Time, Temperature, Setback, Outside Temperature - exist.

In [7]:
def addGeneral(lab, TlabGeneral):
    lab['Outside Temp'] = [np.nan] * len(list(lab['Time']))
    for index, row in lab.iterrows():
        gen_row = np.where(TlabGeneral["Time"] == row['Time'])
        if gen_row[0]:
            lab.loc[index, 'Outside Temp'] = TlabGeneral.iloc[gen_row[0][0]]['OA-T.Trend (deg F)']
    lab.dropna(inplace=True)
    print(lab)
    return lab


### Summary Lab

This dataframe contains all the data about the features for each lab. As shown in the table, these include features such as number of windows, quantitiy of hoods, floor number, and equipment systems.

In [8]:
path = "./HVACSummary.csv"
df_s = pd.read_csv(path)
df_s.head()

Unnamed: 0,LabTitle,Building,Room #,Floor,Qty of Hoods,Windows (North),Windows (West),Windows (South),Windows (East),Augmented Controls,Sensors Description,HVAC Systems Supplying Room,Exhaust/Return System,Unnamed: 13
0,TB15,T,B-15,B-1,1.0,,,,2.0,A/C-43,JCI TE-6314P-1 Temperature,AHU-8,EF-4,
1,T302,T,302,3,1.0,,2.0,,,A/C-23,JCI TE-6314P-1,AHU-6,EF-4,
2,T303,T,303,3,1.0,,,,2.0,A/C-23A,JCI TE-6314P-1,AHU-8,EF-4,
3,T446,T,446,4,1.0,,2.0,,,A/C-22,JCI TE-6314P-1,AHU-12,EF-11,
4,T449,T,449/451,4,1.0,,,,2.0,A/C-13B,JCI TE-6314P-1,AHU-15,EF-11,


In [9]:
labSummary = {}

for index, row in df_s.iterrows():
    lab_title = row[0]

    labSummary[lab_title] = pd.DataFrame([row], columns=df_s.columns)


## Prediction Model

Here, we write our SVR (support vector regression) model `predict_value()`. This function takes in the following values:

`X_train`: input parameters (hour, month, setback, outside temperature) that we train our model on
<br />
`y_train`: output (temperature) correspoding to each input that we train our model on
<br />
`X_test`: input parameters that we then have our model predict
<br />
`y_test`: actual value that we compare our prediction values to
<br />
`t_train` + `t_test`: timesteps that correspond to our data (this is to order our data sequentially when we graph it and is not considered in our model)

Rather than using a time series split, we randomly split our data into the train and test sets. Because the data is all randomized, we have the `sort_data()` function to sort it for our graph visualisations.

We've also used the built-in `svr_lin.coef_` function to identify the weights of each input parameter on the final prediction. Note: the function calculates this based on the results produced when `kernel = 'linear'`.

The `predict_value()` function will use the linear, polynomial, and rbf kernels to fit and predict the values. It will print out the weights for each feature and draw a graph of the predictions modeled against the actual values, and its return values will be the lists of predicted values that each of the linear, polynomial, and rbf models have made (Note: the data will still be randomized to match `y_test`. It is only for the graph that all the lists are sorted).

In [10]:
from sklearn.svm import SVR 

In [11]:
import plotly.express as px # for data visualization
import plotly.graph_objects as go

def predict_value(X_train, y_train, X_test, y_test, t_train, t_test):
    
    svr_lin  = SVR(kernel='linear', C=10)
    svr_poly = SVR(kernel='poly', C=10, degree=2)
    svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1)
    
    # Fit regression model
    svr_lin .fit(X_train, y_train)
    svr_poly.fit(X_train, y_train)
    svr_rbf.fit(X_train, y_train)

    # Feature weights
    lin_weights = svr_lin.coef_
    print(lin_weights)

    # Plot actual values
    fig = px.scatter(df, x=t_test, y=y_test, 
                 opacity=0.8, color_discrete_sequence=['black'])

    # Make predictions
    lin = svr_lin.predict(X_test)
    rbf = svr_rbf.predict(X_test)
    poly = svr_poly.predict(X_test)

    # Sort data sequentially
    sorted_t, sorted_lin = sort_data(t_test, lin)
    sorted_t, sorted_rbf = sort_data(t_test, rbf)
    sorted_t, sorted_poly = sort_data(t_test, poly)

    # Plot predicted values with actual values
    fig.add_traces(go.Scatter(x=sorted_t, y=sorted_lin, name='Linear model', line=dict(color='green')))
    fig.add_traces(go.Scatter(x=sorted_t, y=sorted_rbf, name='RBF model', line=dict(color='red')))
    fig.add_traces(go.Scatter(x=sorted_t, y=sorted_poly, name='Polynomial model', line=dict(color='blue')))
    

    # Change chart background color
    fig.update_layout(dict(plot_bgcolor = 'white'))

    # Update axes lines
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black')

    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgrey', 
                    zeroline=True, zerolinewidth=1, zerolinecolor='lightgrey', 
                    showline=True, linewidth=1, linecolor='black')

    # Update marker size
    fig.update_traces(marker=dict(size=3))

    fig.show()
    
    return lin, rbf, poly


# function to sort values sequentially
def sort_data(x, y):
    merged_list = [(x[i], y[i]) for i in range(0, len(x))]
    sorted_list = sorted(merged_list, key=lambda y: y[0])
    sorted_x, sorted_y = zip(*sorted_list)
    return list(sorted_x), list(sorted_y)




### Preparing Data for SVR Model
In `make_model_data()`, we extract and reshape our parameters to group each row into our <em>X</em> and <em>y</em> values. For each timestep, Time, Hour, Month, Setback, and Outside Temperature are the input features and Temperature (or Trend(deg F)) is the output feature we want to predict. Then, the data is randomized and split into the training and testing data in a 9:1 ratio.

In [12]:
def make_model_data(lab):

    # extract column values

    t = [float(i) for i in range(0, len(lab['Time']))]
    lab['Time'] = t

    d = lab['Trend (deg F)'].apply(pd.to_numeric)
    d = d.astype(float)
    lab['Trend (deg F)'] = d

    def get_data(df):  
        t = df['Time'].values
        # input features
        X1 = df['Hour'].values
        X2 = df['Month'].values
        X3 = df['Setback'].values
        X4 = df['Outside Temp'].values
        y = df['Trend (deg F)']

        # reshape data
        vals = np.vstack((t, X1, X2, X3, X4, y)).T
        vals.reshape(-6, 6)

        return list(vals) # Convert Series to list

    data = get_data(lab)

    # randomize data
    rand_data = random.sample(list(data), k=len(data))

    rand_time = []
    rand_hr = []
    rand_mnth = []
    rand_setback = []
    rand_outemp = []
    rand_y = []

    for time, hour, month, setback, temp, y in rand_data:
        rand_time.append(time)
        rand_hr.append(hour)
        rand_mnth.append(month)
        rand_setback.append(setback)
        rand_outemp.append(temp)
        rand_y.append(y)

    rand_X = np.vstack((rand_hr, rand_mnth, rand_setback, rand_outemp)).T
    rand_X.reshape(-4, 4)

    # train/test split
    ratio = 10
    n = int(len(rand_data) / ratio)

    X_train = rand_X[n:]
    X_test = rand_X[:n]
    y_train = rand_y[n:]
    y_test = rand_y[:n]
    t_train = rand_time[n:]
    t_test = rand_time[:n]


    return X_train, X_test, y_train, y_test, t_train, t_test



In [59]:
# call all functions to preprocess and prepare data

data = prepareCols(labs['H353'], 'H353')
data = addGeneral(data, TlabGeneral)
X_train, X_test, y_train, y_test, times_train, times_test = make_model_data(data)

               Time HUMIDITY %. Trend (%RH) Trend (deg F) Hour  Month  \
1317   6/21/21 9:00                      76          72.5    9      6   
1318  6/21/21 10:00                      76          72.6   10      6   
1319  6/21/21 11:00                      73          72.6   11      6   
1320  6/21/21 12:00                      71          72.6   12      6   
1321  6/21/21 13:00                      72          72.7   13      6   
...             ...                     ...           ...  ...    ...   
3231    9/9/21 3:00                      60          70.6    3      9   
3232    9/9/21 4:00                      59          70.6    4      9   
3233    9/9/21 5:00                      59          70.5    5      9   
3234    9/9/21 6:00                      60          70.5    6      9   
3235    9/9/21 7:00                      58          70.4    7      9   

      Setback Outside Temp  
1317        0         81.4  
1318        0         82.8  
1319        0         83.8  
1320   

Here is an example of what the final dataframe should look like:

In [60]:
data

Unnamed: 0,Time,HUMIDITY %. Trend (%RH),Trend (deg F),Hour,Month,Setback,Outside Temp
1317,0.0,76,72.5,9,6,0,81.4
1318,1.0,76,72.6,10,6,0,82.8
1319,2.0,73,72.6,11,6,0,83.8
1320,3.0,71,72.6,12,6,0,85.4
1321,4.0,72,72.7,13,6,0,85.6
...,...,...,...,...,...,...,...
3231,1914.0,60,70.6,3,9,1,64.8
3232,1915.0,59,70.6,4,9,1,63.6
3233,1916.0,59,70.5,5,9,1,63.5
3234,1917.0,60,70.5,6,9,1,62.8


### Making Predictions

When we run `predict_value()` on our data, we can see the weights have been printed corresponding to the coordinates [hour month setback outside_temperature]. This site (https://stats.stackexchange.com/questions/39243/how-does-one-interpret-svm-feature-weights) goes more in depth of how to read this vector value, but to generalize, the absolute size of a feature relative to the others gives an indication of how important it is.

In [61]:
predictions = predict_value(X_train, y_train, X_test, y_test, times_train, times_test)

[[ 0.00308754 -0.77209818 -0.3981722   0.01580724]]


In [62]:
rbf_pred, lin_pred, poly_pred = predictions

print('RBF Prediction:', rbf_pred[5])
print('Linear Prediction:', lin_pred[5])
print('Polynomial Prediction:', poly_pred[5])

print('Actual Value:', y_test[5])

RBF Prediction: 72.44663756532007
Linear Prediction: 72.6284061821133
Polynomial Prediction: 72.51311239536483
Actual Value: 72.7


### Evaluation: Root Mean Squared Error

In [63]:
from sklearn.metrics import mean_squared_error
from math import sqrt

expected = y_test

rmse_rbf = sqrt(mean_squared_error(expected, rbf_pred))
rmse_lin = sqrt(mean_squared_error(expected, lin_pred))
rmse_poly = sqrt(mean_squared_error(expected, poly_pred))

print("RBF Error:", rmse_rbf)
print("Linear Error:", rmse_lin)
print("Polynomial Error:", rmse_poly)

RBF Error: 0.5670540056556231
Linear Error: 0.35094471414900424
Polynomial Error: 0.5600089536183134


In [18]:
error = [1, 2, 3]
error[4]

IndexError: list index out of range

## Pickling

exporting the trained models to the backend of our webapp

In [None]:
print(predictions)

rbf_model, lin_model, poly_model = predictions

my_path_rbf = "./pred_svr_H353_humid_rbf.pkl"
my_path_lin = "./pred_svr_H353_humid_lin.pkl"
my_path_poly = "./pred_svr_H353_humid_poly.pkl"



(SVR(C=10, kernel='linear'), SVR(C=10, gamma=0.2), SVR(C=10, degree=2, kernel='poly'))


In [None]:
import pickle

with open(my_path_rbf, 'wb') as pickle_file:
    pickle.dump(rbf_model, pickle_file)

with open(my_path_lin, 'wb') as pickle_file:
    pickle.dump(lin_model, pickle_file)

with open(my_path_poly, 'wb') as pickle_file:
    pickle.dump(poly_model, pickle_file)

In [None]:
import pandas as pd

my_path_rbf = "./pred_svr_H353_humid_rbf.pkl"

test_rbf = pd.read_pickle(my_path_rbf)

In [None]:
print(X_test[0][2])
print(type(test_rbf))

goal = np.reshape(X_test[0][2],(1, 1))

print(goal)

1
<class 'sklearn.svm._classes.SVR'>
[[1]]


In [None]:
test_rbf.predict([[10, 4, 0]])

array([74.6400581])

In [None]:
rbf_model, lin_model, poly_model = results

my_path_rbf = "./pred_svr_H353_humid_rbf.pkl"
my_path_lin = "./pred_svr_H353_humid_lin.pkl"
my_path_poly = "./pred_svr_H353_humid_poly.pkl"



<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=bf8e464d-6614-4785-bf90-eb5b6d8a76b5' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>