# Pruebas con Catboost

In [None]:
!pip install catboost
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from catboost import CatBoostRegressor, FeaturesData, Pool
from sklearn.metrics import log_loss

In [None]:
#Load dataset
train_df = pd.read_csv("data/Train_TP2_Datos_2020-2C.csv")
train_df = train_df.drop('Sales_Contract_No',1)
test_df = pd.read_csv("data/Test_TP2_Datos_2020-2C.csv")
test_df = test_df.drop('Sales_Contract_No',1)

In [None]:
train_df.columns

Index(['ID', 'Region', 'Territory', 'Pricing, Delivery_Terms_Quote_Appr',
       'Pricing, Delivery_Terms_Approved', 'Bureaucratic_Code_0_Approval',
       'Bureaucratic_Code_0_Approved', 'Submitted_for_Approval',
       'Bureaucratic_Code', 'Account_Created_Date', 'Source ',
       'Billing_Country', 'Account_Name', 'Opportunity_Name', 'Opportunity_ID',
       'Account_Owner', 'Opportunity_Owner', 'Account_Type',
       'Opportunity_Type', 'Quote_Type', 'Delivery_Terms',
       'Opportunity_Created_Date', 'Brand', 'Product_Type', 'Size',
       'Product_Category_B', 'Price', 'Currency', 'Last_Activity',
       'Quote_Expiry_Date', 'Last_Modified_Date', 'Last_Modified_By',
       'Product_Family', 'Product_Name', 'ASP_Currency', 'ASP',
       'ASP_(converted)_Currency', 'ASP_(converted)',
       'Planned_Delivery_Start_Date', 'Planned_Delivery_End_Date', 'Month',
       'Delivery_Quarter', 'Delivery_Year', 'Actual_Delivery_Date', 'TRF',
       'Total_Amount_Currency', 'Total_Amount',
 

In [None]:
train_df = train_df[train_df["Stage"].str.contains("Closed")]

In [None]:
train_data = train_df[["Opportunity_ID","Total_Taxable_Amount","ASP","TRF","ASP_(converted)","Stage"]]
test_data = test_df[["Opportunity_ID","Total_Taxable_Amount","ASP","TRF","ASP_(converted)"]]

In [None]:
train_data["Stage"] = train_data["Stage"].replace("Closed Won",1)
train_data["Stage"] = train_data["Stage"].replace("Closed Lost",0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
train_data = train_data.groupby(["Opportunity_ID"]).agg({"ASP_(converted)": ["sum", "mean"],
                                                         "TRF": ["sum", "mean", "max"],"ASP": ["mean"],"Total_Taxable_Amount": ["mean"],"Stage":["mean"]}).reset_index()
test_data = test_data.groupby(["Opportunity_ID"]).agg({"ASP_(converted)": ["sum", "mean"],
                                                         "TRF": ["sum", "mean", "max"],"ASP": ["mean"],"Total_Taxable_Amount": ["mean"]}).reset_index()

In [None]:
train_data.head()

Unnamed: 0_level_0,Opportunity_ID,ASP_(converted),ASP_(converted),TRF,TRF,TRF,ASP,Total_Taxable_Amount,Stage
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,sum,mean,max,mean,mean,mean
0,0,0.58817,0.58817,10,10.0,10,0.52,5272800.0,0
1,1,0.59948,0.59948,0,0.0,0,0.53,48230.0,1
2,2,0.48,0.48,0,0.0,0,0.48,83865.6,1
3,3,0.53,0.53,14,14.0,14,0.53,7421881.5,0
4,4,0.53,0.53,25,25.0,25,0.53,13357192.5,0


In [None]:
# Separo las columnas de datos del target
X, Y = train_data.iloc[:,:-1],train_data.iloc[:,-1]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15)

In [None]:
model = CatBoostRegressor(iterations=16, learning_rate=0.28, depth=15)

model.fit(X_train,Y_train)

preds = model.predict(X_test)

logloss = log_loss(Y_test, preds)
print("Log loss: {:.2f}".format(logloss))


0:	learn: 0.4625858	total: 996ms	remaining: 14.9s
1:	learn: 0.4388938	total: 1.89s	remaining: 13.3s
2:	learn: 0.4257538	total: 2.57s	remaining: 11.1s
3:	learn: 0.4144634	total: 3.3s	remaining: 9.9s
4:	learn: 0.4060564	total: 4s	remaining: 8.79s
5:	learn: 0.4002981	total: 4.7s	remaining: 7.83s
6:	learn: 0.3949995	total: 5.38s	remaining: 6.92s
7:	learn: 0.3913805	total: 6.07s	remaining: 6.07s
8:	learn: 0.3884970	total: 6.76s	remaining: 5.26s
9:	learn: 0.3844066	total: 7.45s	remaining: 4.47s
10:	learn: 0.3814239	total: 8.15s	remaining: 3.7s
11:	learn: 0.3766079	total: 8.84s	remaining: 2.94s
12:	learn: 0.3738970	total: 9.55s	remaining: 2.2s
13:	learn: 0.3727041	total: 10.3s	remaining: 1.47s
14:	learn: 0.3716311	total: 11s	remaining: 730ms
15:	learn: 0.3690491	total: 11.6s	remaining: 0us
Log loss: 0.47


In [None]:
model.fit(X, Y)
preds = model.predict(test_data)

0:	learn: 0.4625305	total: 826ms	remaining: 12.4s
1:	learn: 0.4388653	total: 1.54s	remaining: 10.8s
2:	learn: 0.4250072	total: 2.25s	remaining: 9.74s
3:	learn: 0.4120751	total: 2.96s	remaining: 8.89s
4:	learn: 0.4042957	total: 3.67s	remaining: 8.06s
5:	learn: 0.3996112	total: 4.37s	remaining: 7.29s
6:	learn: 0.3936613	total: 5.08s	remaining: 6.53s
7:	learn: 0.3897370	total: 5.79s	remaining: 5.79s
8:	learn: 0.3865140	total: 6.5s	remaining: 5.05s
9:	learn: 0.3832229	total: 7.21s	remaining: 4.33s
10:	learn: 0.3805184	total: 7.94s	remaining: 3.61s
11:	learn: 0.3784534	total: 8.64s	remaining: 2.88s
12:	learn: 0.3760256	total: 9.36s	remaining: 2.16s
13:	learn: 0.3750719	total: 10.1s	remaining: 1.44s
14:	learn: 0.3731411	total: 10.8s	remaining: 719ms
15:	learn: 0.3718928	total: 11.5s	remaining: 0us


In [None]:
preds

array([0.61674987, 0.74250899, 0.87067048, ..., 0.72768675, 0.50702417,
       0.18332087])

In [None]:
predict_df = pd.DataFrame(data=test_data["Opportunity_ID"])

predict_df["Target"] = preds

In [None]:
predict_df.head()

Unnamed: 0,Opportunity_ID,Target
0,10689,0.61675
1,10690,0.742509
2,10691,0.87067
3,10692,0.634901
4,10693,0.88467


In [None]:
predict_df.to_csv("resultado.csv",index=False) 