In [53]:
# Pandas for table processing
import pandas as pd
import lightgbm as lgb

# Packages for auxilary data science tasks: dividing the dataset to train and test and metrics summary generation
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [54]:
data = pd.read_csv("Glass_Type.csv")

In [55]:
data

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,0,2732,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,'vehic wind float'
1,1,2607,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,headlamps
2,2,1653,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,'build wind float'
3,3,3264,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,'build wind non-float'
4,4,4931,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,'build wind non-float'
...,...,...,...,...,...,...,...,...,...,...,...,...
192595,192595,6341,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,headlamps
192596,192596,1042,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,containers
192597,192597,1210,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,'vehic wind float'
192598,192598,191,1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,tableware


In [56]:
data.Type.value_counts()

'build wind non-float'    68251
'build wind float'        62455
headlamps                 26201
'vehic wind float'        15233
containers                12038
tableware                  8422
Name: Type, dtype: int64

In [None]:
# Labels encoding from categories to Integers
data_without_omitted_class['Type'] = LabelEncoder().fit_transform(data_without_omitted_class['Type'])

In [57]:
# get all data of the omitted class "tableware"
omitted_class = data.loc[data['Type'] == 'tableware']

In [58]:
data_without_omitted_class = data.loc[data['Type'] != 'tableware']

In [59]:
data_without_omitted_class

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,0,2732,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,'vehic wind float'
1,1,2607,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,headlamps
2,2,1653,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,'build wind float'
3,3,3264,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,'build wind non-float'
4,4,4931,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,'build wind non-float'
...,...,...,...,...,...,...,...,...,...,...,...,...
192594,192594,4629,1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,'build wind float'
192595,192595,6341,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,headlamps
192596,192596,1042,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,containers
192597,192597,1210,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,'vehic wind float'


In [62]:
data_without_omitted_class

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,0,2732,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,2
1,1,2607,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,4
2,2,1653,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,0
3,3,3264,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,1
4,4,4931,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,1
...,...,...,...,...,...,...,...,...,...,...,...,...
192594,192594,4629,1.52300,13.31,3.58,0.82,71.99,0.12,10.17,0.00,0.03,0
192595,192595,6341,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,4
192596,192596,1042,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,3
192597,192597,1210,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,2


In [63]:
X_train, X_test, Y_train, Y_test = train_test_split(data_without_omitted_class.drop(columns=["Type"]), data_without_omitted_class.Type,
                                                    train_size=0.9, stratify=data_without_omitted_class.Type, random_state=42)

In [64]:
#Converting the dataset in proper LGB format
d_train=lgb.Dataset(X_train, label=Y_train)
#setting up the parameters
params={}
params['learning_rate']=0.03
params['boosting_type']='gbdt' #GradientBoostingDecisionTree
params['objective']='multiclass' #Multi-class target feature
params['metric']='multi_logloss' #metric for multi-class
params['max_depth']=10
params['num_class']=6 #no.of unique values in the target class not inclusive of the end value
#training the model
clf=lgb.train(params,d_train,100)  #training the model on 100 epocs
#prediction on the test dataset
y_pred_1=clf.predict(X_test)



LightGBMError: Do not support special JSON characters in feature name.