# Poisson is our savior

https://www.searchenginejournal.com/python-seo-forecasting/420237/#close


# Points to Remember 
- Quantitative forecasting
- Reorder point = (# units used daily x # days lead time) + # units safety stock


```Sales velocity (SV) = [(# of leads) x (average deal value) x (% conversion rate)] / (sales cycle length)```
For example, imagine a scenario where your company has 20 opportunities to sell a product. The percent of these opportunities that usually get turned into a sale is about 50%. The average deal size is `$5,000`, with a sales cycle length of three months. Sales velocity is `[(20 x .50 x $5,000)/90 days] = $555.56`. This number means that the product is bringing in about $556 per day in revenue.


# Inventory Forecasting Formulas
- EOQ = √2DS/H, where
    - D = Demand in units per year
    - S = Order cost per purchase
    - H = Holding cost per unit, per year
- Reorder point = (# units used daily x # days lead time) + (# units safety stock)
- Average inventory = (Beginning inventory + ending inventory) / 2
- Inventory Turnover Ratio = COGS / average inventory
    - How many times has your company sold and replenished its inventory over the last year? The inventory turnover ratio helps you see how many days it will take to sell the inventory you have on hand. A higher ratio points to strong sales.
    - COGS = sum of all direct costs of producing goods, including raw materials as well as your average inventory
- Safety stock
    - Safety stock = (Maximum number of units sold in a day X maximum lead time for stock replenishment) — (average daily usage X average lead time in days)
- Gross Margin Return on Invested Inventory (GMROI)
    - GMROI = Gross profit margin / average cost of inventory on hand
    - Retail businesses use this formula to see how well they are turning inventory into profits.


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error
from pandas_profiling import ProfileReport
import seaborn as sns
from sklearn.model_selection import train_test_split
import pickle

In [3]:
df = pd.read_csv('neat.csv')
pf = ProfileReport(df,title="Profile")

# Cleaned Data by considering Booked_date has correct year
- Copied the Booked Date's year to the fiscal quarter and month

In [4]:
df.head(5)

Unnamed: 0,Business Unit,Product Family,PLID,Fiscal Quarter,Fiscal Month,Booked_Qty,Booking_Date,Quarter,Month,Year
0,SRSBU,A9K1000,A9K1000-ESP100,Q2 FY2013,JAN FY2013,63,01-01-2013,Q2,JAN,2013
1,TOASTBU,SFP10G,SFP-10G-LR,Q2 FY2013,JAN FY2013,18921,01-01-2013,Q2,JAN,2013
2,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,FEB FY2013,31,01-02-2013,Q3,FEB,2013
3,TOASTBU,SFP10G,SFP-10G-LR,Q3 FY2013,FEB FY2013,12810,01-02-2013,Q3,FEB,2013
4,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,MAR FY2013,61,01-03-2013,Q3,MAR,2013


In [5]:
# pf

# Creating Quarter Column

In [6]:
quarter_list = df['Fiscal Quarter']
quarter_list = np.array(quarter_list)
index = 0
for i in quarter_list:
    q_y = i.split(' ')
    quarter_list[index] = q_y[0]
    index += 1
print(quarter_list)

['Q2' 'Q2' 'Q3' ... 'Q2' 'Q2' 'Q2']


In [7]:
# # # CHeck if it quartereed properly or not
# for k in quarter_list:
#     if(len(k)>2):
#         print(k)

In [8]:
df['Quarter'] = quarter_list

In [9]:
df

Unnamed: 0,Business Unit,Product Family,PLID,Fiscal Quarter,Fiscal Month,Booked_Qty,Booking_Date,Quarter,Month,Year
0,SRSBU,A9K1000,A9K1000-ESP100,Q2 FY2013,JAN FY2013,63,01-01-2013,Q2,JAN,2013
1,TOASTBU,SFP10G,SFP-10G-LR,Q2 FY2013,JAN FY2013,18921,01-01-2013,Q2,JAN,2013
2,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,FEB FY2013,31,01-02-2013,Q3,FEB,2013
3,TOASTBU,SFP10G,SFP-10G-LR,Q3 FY2013,FEB FY2013,12810,01-02-2013,Q3,FEB,2013
4,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,MAR FY2013,61,01-03-2013,Q3,MAR,2013
...,...,...,...,...,...,...,...,...,...,...
10991,RTOBU,C9120AX,C9120AXE-B,Q2 FY2022,DEC FY2022,19845,01-12-2022,Q2,DEC,2022
10992,RTOBU,C9120AX,C9120AXI-B,Q2 FY2022,DEC FY2022,88838,01-12-2022,Q2,DEC,2022
10993,RTOBU,C9130AX,C9130AXE-B,Q2 FY2022,DEC FY2022,3214,01-12-2022,Q2,DEC,2022
10994,RTOBU,C9130AX,C9130AXI-B,Q2 FY2022,DEC FY2022,54344,01-12-2022,Q2,DEC,2022


# Creating Month Column

In [10]:
month_list = df['Fiscal Month']
month_list = np.array(month_list)
index = 0
for i in month_list:
    q_y = i.split(' ')[0]
    month_list[index] = q_y;
    index += 1
print(month_list)

['JAN' 'JAN' 'FEB' ... 'DEC' 'DEC' 'DEC']


In [11]:
# CHECK if Month is monthed properly
# for k in month_list:
#     if(len(k)>3):
#         print(k)

In [12]:
df['Month'] = month_list
df

Unnamed: 0,Business Unit,Product Family,PLID,Fiscal Quarter,Fiscal Month,Booked_Qty,Booking_Date,Quarter,Month,Year
0,SRSBU,A9K1000,A9K1000-ESP100,Q2 FY2013,JAN FY2013,63,01-01-2013,Q2,JAN,2013
1,TOASTBU,SFP10G,SFP-10G-LR,Q2 FY2013,JAN FY2013,18921,01-01-2013,Q2,JAN,2013
2,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,FEB FY2013,31,01-02-2013,Q3,FEB,2013
3,TOASTBU,SFP10G,SFP-10G-LR,Q3 FY2013,FEB FY2013,12810,01-02-2013,Q3,FEB,2013
4,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,MAR FY2013,61,01-03-2013,Q3,MAR,2013
...,...,...,...,...,...,...,...,...,...,...
10991,RTOBU,C9120AX,C9120AXE-B,Q2 FY2022,DEC FY2022,19845,01-12-2022,Q2,DEC,2022
10992,RTOBU,C9120AX,C9120AXI-B,Q2 FY2022,DEC FY2022,88838,01-12-2022,Q2,DEC,2022
10993,RTOBU,C9130AX,C9130AXE-B,Q2 FY2022,DEC FY2022,3214,01-12-2022,Q2,DEC,2022
10994,RTOBU,C9130AX,C9130AXI-B,Q2 FY2022,DEC FY2022,54344,01-12-2022,Q2,DEC,2022


# Creating Year Column

In [13]:
year_list = df['Fiscal Month']
year_list = np.array(year_list)
index = 0
for i in year_list:
    q_y = i.split(' ')
    year_list[index] = int(q_y[1].split('Y')[1])
    index += 1
print(year_list)

[2013 2013 2013 ... 2022 2022 2022]


In [14]:
# # CHECK if Month is monthed properly
# for k in year_list:
#     if(k<2002):
#         print(k)

In [15]:
df['Year'] = year_list
df

Unnamed: 0,Business Unit,Product Family,PLID,Fiscal Quarter,Fiscal Month,Booked_Qty,Booking_Date,Quarter,Month,Year
0,SRSBU,A9K1000,A9K1000-ESP100,Q2 FY2013,JAN FY2013,63,01-01-2013,Q2,JAN,2013
1,TOASTBU,SFP10G,SFP-10G-LR,Q2 FY2013,JAN FY2013,18921,01-01-2013,Q2,JAN,2013
2,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,FEB FY2013,31,01-02-2013,Q3,FEB,2013
3,TOASTBU,SFP10G,SFP-10G-LR,Q3 FY2013,FEB FY2013,12810,01-02-2013,Q3,FEB,2013
4,SRSBU,A9K1000,A9K1000-ESP100,Q3 FY2013,MAR FY2013,61,01-03-2013,Q3,MAR,2013
...,...,...,...,...,...,...,...,...,...,...
10991,RTOBU,C9120AX,C9120AXE-B,Q2 FY2022,DEC FY2022,19845,01-12-2022,Q2,DEC,2022
10992,RTOBU,C9120AX,C9120AXI-B,Q2 FY2022,DEC FY2022,88838,01-12-2022,Q2,DEC,2022
10993,RTOBU,C9130AX,C9130AXE-B,Q2 FY2022,DEC FY2022,3214,01-12-2022,Q2,DEC,2022
10994,RTOBU,C9130AX,C9130AXI-B,Q2 FY2022,DEC FY2022,54344,01-12-2022,Q2,DEC,2022


In [16]:
# df.to_csv('neat.csv',index=False)

# Trying to find Trends and patterns in **Data**

In [17]:
df_dp = df.drop(['Business Unit','Product Family','Fiscal Quarter','Fiscal Month','Booking_Date'], axis=1)
df_dp

Unnamed: 0,PLID,Booked_Qty,Quarter,Month,Year
0,A9K1000-ESP100,63,Q2,JAN,2013
1,SFP-10G-LR,18921,Q2,JAN,2013
2,A9K1000-ESP100,31,Q3,FEB,2013
3,SFP-10G-LR,12810,Q3,FEB,2013
4,A9K1000-ESP100,61,Q3,MAR,2013
...,...,...,...,...,...
10991,C9120AXE-B,19845,Q2,DEC,2022
10992,C9120AXI-B,88838,Q2,DEC,2022
10993,C9130AXE-B,3214,Q2,DEC,2022
10994,C9130AXI-B,54344,Q2,DEC,2022


In [18]:
df_dp = df.drop(['Business Unit','Product Family','Fiscal Quarter','Fiscal Month','Booking_Date'], axis=1)
le = LabelEncoder()
df_dp['PLID'] = le.fit_transform(df_dp['PLID'])
product_dict = dict(zip(le.classes_, le.transform(le.classes_)))
print(product_dict)

{'8201-SYS': 0, '88-LC0-36FH': 1, '8800-LC-36FH': 2, '8800-LC-48H': 3, '8808-FC': 4, '8808-SYS': 5, '8812-SYS': 6, '8818-SYS': 7, 'A900-IMA-8Z': 8, 'A99-32X100GE-X-SE': 9, 'A9K-24X10GE-1G-TR': 10, 'A9K-8X100GE-TR': 11, 'A9K-920-12CZ-A': 12, 'A9K-920-12SZ-IM': 13, 'A9K-920-4SZ-D': 14, 'A9K-9901': 15, 'A9K-9903': 16, 'A9K-RSP5-SE': 17, 'A9K-RSP5-TR': 18, 'A9K-RSP880-SE': 19, 'A9K-RSP880-TR': 20, 'A9K1000-ESP100': 21, 'A9K1001-HX': 22, 'A9K1001-X': 23, 'A9K1002-HX': 24, 'C1000-24T-4G-L': 25, 'C1000-48T-4G-L': 26, 'C1111-4P': 27, 'C1111-8P': 28, 'C1111-8PLTExA': 29, 'C1116-4P': 30, 'C1121-8P': 31, 'C8200-1N-4T': 32, 'C8200L-1N-4T': 33, 'C8300-1N1S-6T': 34, 'C8300-2N2S-4T2X': 35, 'C8300-2N2S-6T': 36, 'C8500-12X': 37, 'C8500-12X4QC': 38, 'C8500L-8S4X': 39, 'C9105AXI-B': 40, 'C9105AXW-B': 41, 'C9115AXE-B': 42, 'C9115AXI-B': 43, 'C9120AXE-B': 44, 'C9120AXI-B': 45, 'C9130AXE-B': 46, 'C9130AXI-B': 47, 'C9200-24T-E': 48, 'C9200-48P-A': 49, 'C9200-48P-E': 50, 'C9200-48PXG-E': 51, 'C9200-48T-E': 52

In [19]:
X = np.array(df_dp[['PLID','Quarter','Month','Year']])
y = np.array(df_dp['Booked_Qty'])

In [20]:
# ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
# X = np.array(ct.fit_transform(X))
# ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [-2])], remainder='passthrough')
# X = np.array(ct.fit_transform(X))

In [21]:
print(j)

NameError: ignored

# Mulitple Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)
y_pred
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

print(regressor.score(X_test, y_test))

# Polynomial Regression

# Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=10, random_state=0)
regressor.fit(X_train,y_train)

In [None]:
X

In [None]:
predictions = regressor.predict(X_test)
for p in range(len(predictions)):
    print(predictions[p], X_test[p], y_test[p])

In [None]:
regressor.score(X_test,y_test)

# Neural Networks

## Feature Scaling

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_fs = sc.fit_transform(X_train)
X_test_fs = sc.transform(X_test)

In [None]:
X_train_fs

In [None]:
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=5, activation='relu'))
ann.add(tf.keras.layers.Dense(units=5, activation='relu'))
ann.add(tf.keras.layers.Dense(units=1, activation='softmax'))

In [None]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [None]:
ann.fit(X_train_fs, y_train, batch_size = 32, epochs = 100)

In [None]:
y_pred = ann.predict(X_test)
y_pred = (y_pred > 0.5)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

# XG Boost

In [None]:
df_dp

In [None]:
from xgboost import XGBRegressor
classifier = XGBRegressor()
classifier.fit(X_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
print(y_pred)
# cm = confusion_matrix(y_test, y_pred)
# print(cm)
# accuracy_score(y_test, y_pred)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

# CatBoost

In [None]:
!pip install catboost

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 2)
print("Accuracy: {:.2f} %".format(accuracies.mean()*100))
print("Standard Deviation: {:.2f} %".format(accuracies.std()*100))

# Trying out all

In [22]:
!pip install fancyimpute

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [23]:
from fancyimpute import IterativeImputer
from sklearn.metrics import mean_squared_error

from sklearn.svm import SVR, LinearSVR
from sklearn.linear_model import ElasticNet, Lasso, RidgeCV,LinearRegression
from sklearn.kernel_ridge import KernelRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor,AdaBoostRegressor,RandomForestRegressor
import xgboost as xgb
import lightgbm as lgb


In [None]:
df

In [None]:
df_dp

In [None]:
product_dict['A9K1000-ESP100']

In [None]:
# Get all product data
df_Product = df_dp[df_dp['PLID'] == 44]

# Get Independent and Dependent
X = np.array(df_Product[['PLID','Quarter','Month','Year']])
y = np.array(df_Product['Booked_Qty'])

# One Hot Encoding
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [-2])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

#Train Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)


classifiers = [
    LinearRegression(),
    RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]),
    Lasso(alpha =16, random_state=100),
    ElasticNet(alpha=0.8),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    AdaBoostRegressor(),
    SVR(), 
    LinearSVR(), 
    xgb.XGBRegressor(),
    lgb.LGBMRegressor()
    ]

name = []
score = []
models = []
rmse = []
i = 0
for classifier in classifiers:
    classifier.fit(X_train, y_train)   
    name.append(type(classifier).__name__)
    score.append(classifier.score(X_test, y_test))
    models.append(classifier)
    rmse.append(np.sqrt(mean_squared_error(classifier.predict(X_test), y_test)))
    from sklearn.model_selection import cross_val_score

df_score = pd.DataFrame(list(zip(name,rmse, score, models)),columns=['name','rmse','score',"model"])
df_score.set_index('name',inplace=True)
df_score.sort_values(by=['score'], ascending= False,inplace=True)
df_score

# Testing for Better Accuracy

In [24]:
quarter_ohe = {'Q1':[1,0,0,0],'Q2':[0,1,0,0],'Q3':[0,0,1,0],'Q4':[0,0,0,1]}
month_ohe = {'JAN':[0,0,0,0,0,1,0,0,0,0,0,0],'FEB':[0,0,0,0,0,0,1,0,0,0,0,0],'MAR':[0,0,0,0,0,0,0,1,0,0,0,0],'APR':[0,0,0,0,0,0,0,0,1,0,0,0],
              'MAY':[0,0,0,0,0,0,0,0,0,1,0,0],'JUN':[0,0,0,0,0,0,0,0,0,0,1,0],'JUL':[0,0,0,0,0,0,0,0,0,0,0,1],'AUG':[1,0,0,0,0,0,0,0,0,0,0,0],
              'SEP':[0,1,0,0,0,0,0,0,0,0,0,0],'OCT':[0,0,1,0,0,0,0,0,0,0,0,0],'NOV':[0,0,0,1,0,0,0,0,0,0,0,0],'DEC':[0,0,0,0,1,0,0,0,0,0,0,0]}

In [25]:
# pid = 0

# for k,v in product_dict.items():
#   if k == "A9K1001-X":
#     pid = v
#     break

def getEncode(pid):
  # Get all product data
  df_Product = df_dp[df_dp['PLID'] == pid]
  # print(df_Product)
  # Get Independent and Dependent
  X = np.array(df_Product[['PLID','Quarter','Month','Year']])
  y = np.array(df_Product['Booked_Qty'])

  Xen = []
  for row in X:
      upq = quarter_ohe[f'{row[1]}']
      upm = month_ohe[f'{row[2]}']
      encodings = (upm, upq)
      Xen.append(upm+upq+[row[0]]+[row[3]])

  Xen = np.array(Xen)

  # print(Xen)

  return(Xen,y)

def foo(p, q, m, y):
  upq = quarter_ohe[f'{q}']
  upm = month_ohe[f'{m}']
  encodings = (upm, upq)
  return(upm+upq+[p]+[y])

In [26]:
# test = foo(pid, 'Q2', 'JAN', 2023)
# print(y[0])
# print(test)

In [27]:
new_df = pd.DataFrame({"PLID":[], "MODEL":[], "ACCURACY":[]})

In [28]:
models_to_check = {}
for k,pid in product_dict.items():
  Xen,y = getEncode(pid)
  # print(Xen)
  #Train Test Split
  # break
  X_train, X_test, y_train, y_test = train_test_split(Xen, y, test_size = 0.2, random_state = 42)

  classifiers = [
      LinearRegression(),
      RidgeCV(alphas=[1e-3, 1e-2, 1e-1, 1]),
      Lasso(alpha =16, random_state=100),
      ElasticNet(alpha=0.8),
      DecisionTreeRegressor(),
      RandomForestRegressor(),
      GradientBoostingRegressor(),
      AdaBoostRegressor(),
      SVR(), 
      LinearSVR(), 
      xgb.XGBRegressor(),
      lgb.LGBMRegressor()
      ]

  name = []
  score = []
  models = []
  rmse = []
  # maximum = float('-inf');
  minimum = float('inf')
  times_to_train = 10
  model_to_pickle = None

  for _ in range(times_to_train):
    for classifier in classifiers:
        classifier.fit(X_train, y_train)   
        name.append(type(classifier).__name__)
        score.append(classifier.score(X_test, y_test))
        models.append(classifier)
        rmse.append(np.sqrt(mean_squared_error(classifier.predict(X_test), y_test)))
        # y_pred = classifier.predict([test])
        if(rmse[-1] < minimum):
          model_to_pickle = (classifier, rmse[-1])
          models_to_check[k] = (classifier, rmse[-1])
          minimum = rmse[-1]


    df_score = pd.DataFrame(list(zip(name,rmse, score, models)),columns=['name','rmse','score',"model"])
    df_score.set_index('name',inplace=True)
    df_score.sort_values(by=['score'], ascending= False,inplace=True)
  k = k.replace("/", "")
  try:
    with open(f"./pickles/{k}.pickle", "wb") as f:
      pickle.dump(model_to_pickle, f)
    # print(f"\x1b[91m CHOSEN MODEL FOR {k} IS {type(classifier).__name__} WITH AN ACCURACY OF {maximum}\x1b[0m")
    new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  except:
    # print(f"\x1b[91m CHOSEN MODEL FOR {k} IS {type(classifier).__name__} WITH AN ACCURACY OF {maximum} WASN'T PICKLED\x1b[0m")
    
    model_to_pickle[0].booster_.save_model(f'{k}.txt')
    with open(f"./pickles/{k}-rmse.txt", "w") as f:
      f.write(str(model_to_pickle[1]))
# # print(df_score)
# print("&"*100)
# print(model_acc)
# print("&"*100)
# print(y)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier).__name__, "RMSE":minimum}, ignore_index=True)
  new_df.append({"PLID":k, "MODEL":type(classifier)._

In [29]:
print(models_to_check)

{'8201-SYS': (RandomForestRegressor(), 30.284876588819042), '88-LC0-36FH': (LGBMRegressor(), 38.31095361207636), '8800-LC-36FH': (DecisionTreeRegressor(), 97.6012295004525), '8800-LC-48H': (LGBMRegressor(), 304.3619448070913), '8808-FC': (XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             n_estimators=100, n_jobs=None, num_parallel_tree=None,
             predictor=None, random_state=None, ...), 885.989215566744), 

In [30]:
print(*model_to_pickle.items(), sep = '\n', end="\n"+"-"*100)

AttributeError: ignored

In [36]:
pid = "8201-SYS"#'IE-4010-16S12P'
print(models_to_check[pid][1])
models_to_check[pid][0].predict([foo(product_dict[pid], "Q1", "OCT", 2022)])

30.284876588819042


array([304.26])

In [None]:
# pickle_in = open("./pickles/learnModel.pickle", "rb")
# models = pickle.load(pickle_in)
# pickle_in.close()

with open("learnModelNew.pickle", "wb") as f:
  pickle.dump({1:model_to_pickle[pid][0], 3:4}, f)

In [None]:

pickle_in = open("./learnModelNew.pickle", "rb")
models = pickle.load(pickle_in)
pickle_in.close()

print(models)

In [32]:
!ls -1 ./pickles/ | wc -l

251


In [None]:
!pwd

In [31]:
!zip ./pickles.zip ./pickles/*

updating: pickles/8201-SYS.pickle (deflated 89%)
updating: pickles/8800-LC-36FH.pickle (deflated 67%)
updating: pickles/8800-LC-48H.pickle (deflated 54%)
updating: pickles/8808-FC.pickle (deflated 82%)
updating: pickles/8808-SYS.pickle (deflated 85%)
updating: pickles/8812-SYS.pickle (deflated 15%)
updating: pickles/8818-SYS.pickle (deflated 16%)
updating: pickles/88-LC0-36FH.pickle (deflated 55%)
updating: pickles/A900-IMA-8Z.pickle (deflated 14%)
updating: pickles/A99-32X100GE-X-SE.pickle (deflated 84%)
updating: pickles/A9K1000-ESP100.pickle (deflated 84%)
updating: pickles/A9K1001-HX.pickle (deflated 76%)
updating: pickles/A9K1001-X.pickle (deflated 83%)
updating: pickles/A9K1002-HX.pickle (deflated 85%)
updating: pickles/A9K-24X10GE-1G-TR.pickle (deflated 85%)
updating: pickles/A9K-8X100GE-TR.pickle (deflated 76%)
updating: pickles/A9K-920-12CZ-A.pickle (deflated 74%)
updating: pickles/A9K-920-12SZ-IM.pickle (deflated 29%)
updating: pickles/A9K-920-4SZ-D.pickle (deflated 13%)
upda

In [None]:
models_to_check

In [38]:
pickle_in = open("./pickles/8201-SYS.pickle", "rb")
linear = pickle.load(pickle_in)
pickle_in.close()
print(linear)

(RandomForestRegressor(), 30.284876588819042)
