In [1]:
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
from sklearn.decomposition import FastICA
from sklearn import random_projection
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import homogeneity_score
from collections import Counter
import pandas as pd
import numpy as np
import random
from pytictoc import TicToc
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
t = TicToc()

In [2]:
dfTraining = pd.read_csv('trainingData_treated.csv')
dfTest = pd.read_csv('testData_treated.csv')

In [3]:
# Preparing the dataset for clustering and later for the neural network

dfTest = dfTest.groupby('adjusted_gross_income', group_keys=False).apply(lambda x: x.sample(2000, random_state=1))
dfTraining = dfTraining.groupby('adjusted_gross_income', group_keys=False).apply(lambda x: x.sample(8000, random_state=1))
dfTrainingX = pd.concat((dfTraining["capital_gains"]/max(dfTraining["capital_gains"]),
    dfTraining["divdends_from_stocks"]/max(dfTraining["divdends_from_stocks"]),                    
    pd.get_dummies(dfTraining["education"]),
    pd.get_dummies(dfTraining["class_of_worker"]),
    pd.get_dummies(dfTraining["race"]),
    pd.get_dummies(dfTraining["sex"])), axis=1)

dfTrainingY = pd.get_dummies(dfTraining["adjusted_gross_income"])
dfTrainingY = dfTrainingY.drop([" - 50000."],axis=1)
dfTrainingY = dfTrainingY.rename(columns={" 50000+.": "Income"})
dfTestX = pd.concat((dfTest["capital_gains"]/max(dfTest["capital_gains"]),
    dfTest["divdends_from_stocks"]/max(dfTest["divdends_from_stocks"]),                    
    pd.get_dummies(dfTest["education"]),
    pd.get_dummies(dfTest["class_of_worker"]),
    pd.get_dummies(dfTest["race"]),
    pd.get_dummies(dfTest["sex"])), axis=1)

dfTestY = pd.get_dummies(dfTest["adjusted_gross_income"])
dfTestY = dfTestY.drop([" - 50000."],axis=1)
dfTestY = dfTestY.rename(columns={" 50000+.": "Income"})
# Initialize Neural Network
nnBP = MLPClassifier(solver='lbfgs', alpha=1e-5,
                     hidden_layer_sizes=(2, 2), random_state=1,max_iter=10000,activation='relu')
t.tic()
nnBP.fit(dfTrainingX, dfTrainingY.values.ravel())
taux = t.tocvalue()
dfTestX[" Without pay"]=0
dfTestX = dfTestX.reindex(columns=dfTrainingX.columns.values.tolist())
dfTestY_Pred = nnBP.predict(dfTestX)
dfTrainingY_Pred = nnBP.predict(dfTrainingX)
print(accuracy_score(dfTrainingY, dfTrainingY_Pred))
print(accuracy_score(dfTestY, dfTestY_Pred))
neuralnetwork_sol = pd.DataFrame([['Base',0,0,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])


0.8293125
0.82775


In [4]:
label_real = dfTrainingY['Income'].tolist()
corpus = dfTrainingX
print(corpus.shape)


(16000, 35)


In [5]:
homogeneity_sol = pd.DataFrame([['Test',0,0,0.000,0.000]],columns=["Algorithm","Components","Clusters","Homogeneity","Time"])
homogeneity_sol

Unnamed: 0,Algorithm,Components,Clusters,Homogeneity,Time
0,Test,0,0,0.0,0.0


In [6]:
corpus = corpus.fillna(0)
corpus[corpus > 1] = 1
#columnas = list(corpus.columns)
#columnas_words = vocabulary["ID"]=

    

In [7]:
t.tic()
pca = PCA(n_components=35, random_state = 19830526)
pca.fit(corpus)
t.toc('PCA Calibration with 35 components')

PCA Calibration with 35 components 0.122725 seconds.


In [8]:
np.cumsum(pca.explained_variance_ratio_)

array([0.23651296, 0.39733528, 0.48549611, 0.55870874, 0.63017314,
       0.69266135, 0.74314716, 0.77315496, 0.79814576, 0.82232179,
       0.84408099, 0.86303625, 0.87962617, 0.89505644, 0.9090803 ,
       0.92188629, 0.93355204, 0.94343579, 0.95320409, 0.96267595,
       0.97050168, 0.97726495, 0.98330776, 0.9874877 , 0.99149861,
       0.99470947, 0.99713988, 0.99826676, 0.99928197, 0.99978577,
       1.        , 1.        , 1.        , 1.        , 1.        ])

In [9]:
sum(pca.explained_variance_ratio_)

1.0000000000000002

In [10]:
t.tic()
corpus_pca = pca.transform(corpus)
t.toc('PCA transform took')

PCA transform took 0.030568 seconds.


In [11]:
# Running PCA First
dfTestXPCA = pca.transform(dfTestX)
dfTrainingXPCA = pca.transform(dfTrainingX)
labels_pca = []
for iteCom in range(1,35):
    t.tic()
    nnBP.fit(dfTrainingXPCA[:,0:iteCom], dfTrainingY.values.ravel())
    taux = t.tocvalue()
    dfTestY_Pred = nnBP.predict(dfTestXPCA[:,0:iteCom])
    dfTrainingY_Pred = nnBP.predict(dfTrainingXPCA[:,0:iteCom])
    print("Neural Netwotk with PCA with "+str(iteCom)+" features took to run " + str(taux))
    new_row = pd.DataFrame([["PCA",iteCom,0,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
    neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])
    for ite in range(1,20):
        t.tic()
        gm_pca = GaussianMixture(n_components=ite, random_state=19830526).fit(corpus_pca[:,0:iteCom])   
        gm_label_pca = gm_pca.predict(corpus_pca[:,0:iteCom])
        labels_pca.append(gm_label_pca)
        taux = t.tocvalue()
        print("PCA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
        new_row = pd.DataFrame([["PCA",iteCom,ite,homogeneity_score(label_real,gm_label_pca),taux]],columns=["Algorithm","Components","Clusters","Homogeneity","Time"])
        homogeneity_sol = pd.concat([homogeneity_sol,new_row])
        nnBP.fit(pd.get_dummies(gm_label_pca), dfTrainingY.values.ravel())
        taux = t.tocvalue()
        dfTestY_Pred = nnBP.predict(pd.get_dummies(gm_pca.predict(dfTestXPCA[:,0:iteCom])))
        dfTrainingY_Pred = nnBP.predict(pd.get_dummies(gm_label_pca))
        print("Neural Netwotk with PCA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
        new_row = pd.DataFrame([["PCA",iteCom,ite,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
        neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])

Neural Netwotk with PCA with 1 features took to run 0.6421458000000015
PCA with 1 features and 1 clusters took to run 0.3759981000000039
Neural Netwotk with PCA with 1 features and 1 clusters took to run 0.4538606999999999
PCA with 1 features and 2 clusters took to run 0.09499449999999854
Neural Netwotk with PCA with 1 features and 2 clusters took to run 0.16571840000000293
PCA with 1 features and 3 clusters took to run 0.08992310000000003
Neural Netwotk with PCA with 1 features and 3 clusters took to run 0.4522691000000023
PCA with 1 features and 4 clusters took to run 0.23075339999999755
Neural Netwotk with PCA with 1 features and 4 clusters took to run 0.40035070000000417
PCA with 1 features and 5 clusters took to run 0.131284100000002
Neural Netwotk with PCA with 1 features and 5 clusters took to run 0.42524099999999976
PCA with 1 features and 6 clusters took to run 0.18696959999999763
Neural Netwotk with PCA with 1 features and 6 clusters took to run 0.2663133000000002
PCA with 1 

PCA with 3 features and 16 clusters took to run 0.501587100000009
Neural Netwotk with PCA with 3 features and 16 clusters took to run 1.972836700000002
PCA with 3 features and 17 clusters took to run 0.5364586999999972
Neural Netwotk with PCA with 3 features and 17 clusters took to run 1.0433248999999876
PCA with 3 features and 18 clusters took to run 0.41658510000000604
Neural Netwotk with PCA with 3 features and 18 clusters took to run 0.634563
PCA with 3 features and 19 clusters took to run 0.45031649999999956
Neural Netwotk with PCA with 3 features and 19 clusters took to run 0.48973370000000216
Neural Netwotk with PCA with 4 features took to run 3.8417798999999917
PCA with 4 features and 1 clusters took to run 0.030491299999994226
Neural Netwotk with PCA with 4 features and 1 clusters took to run 0.06874460000000227
PCA with 4 features and 2 clusters took to run 0.18765640000000872
Neural Netwotk with PCA with 4 features and 2 clusters took to run 0.22496490000000335
PCA with 4 fe

Neural Netwotk with PCA with 6 features and 11 clusters took to run 0.9862569999999948
PCA with 6 features and 12 clusters took to run 0.7924443999999937
Neural Netwotk with PCA with 6 features and 12 clusters took to run 0.9849349999999788
PCA with 6 features and 13 clusters took to run 1.0345973000000015
Neural Netwotk with PCA with 6 features and 13 clusters took to run 1.1813185999999973
PCA with 6 features and 14 clusters took to run 1.0045419999999865
Neural Netwotk with PCA with 6 features and 14 clusters took to run 1.1883301999999958
PCA with 6 features and 15 clusters took to run 0.9746462999999892
Neural Netwotk with PCA with 6 features and 15 clusters took to run 1.1287109999999814
PCA with 6 features and 16 clusters took to run 0.9468278000000225
Neural Netwotk with PCA with 6 features and 16 clusters took to run 1.3200390000000084
PCA with 6 features and 17 clusters took to run 1.3356282000000022
Neural Netwotk with PCA with 6 features and 17 clusters took to run 1.592299

PCA with 9 features and 7 clusters took to run 0.9147831000000224
Neural Netwotk with PCA with 9 features and 7 clusters took to run 1.0100947000000247
PCA with 9 features and 8 clusters took to run 0.5256034999999883
Neural Netwotk with PCA with 9 features and 8 clusters took to run 0.6033308999999747
PCA with 9 features and 9 clusters took to run 0.5660207000000241
Neural Netwotk with PCA with 9 features and 9 clusters took to run 0.7391977999999995
PCA with 9 features and 10 clusters took to run 0.525470299999995
Neural Netwotk with PCA with 9 features and 10 clusters took to run 0.6019484999999918
PCA with 9 features and 11 clusters took to run 1.673320600000011
Neural Netwotk with PCA with 9 features and 11 clusters took to run 1.8546956000000137
PCA with 9 features and 12 clusters took to run 1.7321507999999994
Neural Netwotk with PCA with 9 features and 12 clusters took to run 2.3225596999999993
PCA with 9 features and 13 clusters took to run 2.0176806000000056
Neural Netwotk wi

PCA with 12 features and 2 clusters took to run 0.3759959000000208
Neural Netwotk with PCA with 12 features and 2 clusters took to run 0.41535559999999805
PCA with 12 features and 3 clusters took to run 0.7869266000000152
Neural Netwotk with PCA with 12 features and 3 clusters took to run 0.8847346000000016
PCA with 12 features and 4 clusters took to run 0.32353069999999207
Neural Netwotk with PCA with 12 features and 4 clusters took to run 0.41251920000001974
PCA with 12 features and 5 clusters took to run 0.5921518999999762
Neural Netwotk with PCA with 12 features and 5 clusters took to run 0.7386154999999803
PCA with 12 features and 6 clusters took to run 1.0181609999999637
Neural Netwotk with PCA with 12 features and 6 clusters took to run 1.053364799999997
PCA with 12 features and 7 clusters took to run 2.3385986000000116
Neural Netwotk with PCA with 12 features and 7 clusters took to run 2.4222637000000304
PCA with 12 features and 8 clusters took to run 2.425220999999965
Neural N

Neural Netwotk with PCA with 14 features and 16 clusters took to run 2.89263440000002
PCA with 14 features and 17 clusters took to run 2.7086870999999633
Neural Netwotk with PCA with 14 features and 17 clusters took to run 2.9744973999999615
PCA with 14 features and 18 clusters took to run 4.935132199999998
Neural Netwotk with PCA with 14 features and 18 clusters took to run 5.246289500000046
PCA with 14 features and 19 clusters took to run 8.035713199999975
Neural Netwotk with PCA with 14 features and 19 clusters took to run 8.077614499999981
Neural Netwotk with PCA with 15 features took to run 0.7656235000000038
PCA with 15 features and 1 clusters took to run 0.09347920000004706
Neural Netwotk with PCA with 15 features and 1 clusters took to run 0.12957940000001145
PCA with 15 features and 2 clusters took to run 0.14831290000000763
Neural Netwotk with PCA with 15 features and 2 clusters took to run 0.18590119999998933
PCA with 15 features and 3 clusters took to run 0.2729391999999961

PCA with 17 features and 12 clusters took to run 2.8682559000000083
Neural Netwotk with PCA with 17 features and 12 clusters took to run 3.0124951000000237
PCA with 17 features and 13 clusters took to run 2.8239494000000605
Neural Netwotk with PCA with 17 features and 13 clusters took to run 2.880223200000046
PCA with 17 features and 14 clusters took to run 1.7654406999999992
Neural Netwotk with PCA with 17 features and 14 clusters took to run 2.043220799999972
PCA with 17 features and 15 clusters took to run 2.9177438000000393
Neural Netwotk with PCA with 17 features and 15 clusters took to run 3.021479900000031
PCA with 17 features and 16 clusters took to run 2.2532743999998956
Neural Netwotk with PCA with 17 features and 16 clusters took to run 3.321649999999977
PCA with 17 features and 17 clusters took to run 2.722285100000022
Neural Netwotk with PCA with 17 features and 17 clusters took to run 3.2588088999999627
PCA with 17 features and 18 clusters took to run 3.060724999999934
Ne

PCA with 20 features and 7 clusters took to run 0.6582297999999582
Neural Netwotk with PCA with 20 features and 7 clusters took to run 0.7317775999999867
PCA with 20 features and 8 clusters took to run 0.9304796000000124
Neural Netwotk with PCA with 20 features and 8 clusters took to run 1.0574485999999297
PCA with 20 features and 9 clusters took to run 1.0845207999999502
Neural Netwotk with PCA with 20 features and 9 clusters took to run 1.4627868000000035
PCA with 20 features and 10 clusters took to run 1.2006071000000702
Neural Netwotk with PCA with 20 features and 10 clusters took to run 1.394218500000079
PCA with 20 features and 11 clusters took to run 1.3382736999999452
Neural Netwotk with PCA with 20 features and 11 clusters took to run 1.453366200000005
PCA with 20 features and 12 clusters took to run 1.4289583999999422
Neural Netwotk with PCA with 20 features and 12 clusters took to run 1.6267014999999674
PCA with 20 features and 13 clusters took to run 2.2800644999999804
Neur

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Neural Netwotk with PCA with 22 features and 18 clusters took to run 10.650682299999971
PCA with 22 features and 19 clusters took to run 32.12681829999997
Neural Netwotk with PCA with 22 features and 19 clusters took to run 32.20153299999993
Neural Netwotk with PCA with 23 features took to run 0.03583770000000186
PCA with 23 features and 1 clusters took to run 0.49370760000010705
Neural Netwotk with PCA with 23 features and 1 clusters took to run 0.5732540000001336
PCA with 23 features and 2 clusters took to run 1.2030124000000342
Neural Netwotk with PCA with 23 features and 2 clusters took to run 1.2560863999999583
PCA with 23 features and 3 clusters took to run 4.321535099999892
Neural Netwotk with PCA with 23 features and 3 clusters took to run 4.4142466999999215
PCA with 23 features and 4 clusters took to run 3.5036417999999685
Neural Netwotk with PCA with 23 features and 4 clusters took to run 3.637658500000043
PCA with 23 features and 5 clusters took to run 10.120075999999926
Neu

Neural Netwotk with PCA with 25 features and 13 clusters took to run 14.645901600000116
PCA with 25 features and 14 clusters took to run 31.974107900000035
Neural Netwotk with PCA with 25 features and 14 clusters took to run 32.248260800000025
PCA with 25 features and 15 clusters took to run 20.29965949999996
Neural Netwotk with PCA with 25 features and 15 clusters took to run 20.457447399999865
PCA with 25 features and 16 clusters took to run 14.736562800000002
Neural Netwotk with PCA with 25 features and 16 clusters took to run 19.50324069999988
PCA with 25 features and 17 clusters took to run 16.28713360000006
Neural Netwotk with PCA with 25 features and 17 clusters took to run 16.857926099999986
PCA with 25 features and 18 clusters took to run 24.11433050000005
Neural Netwotk with PCA with 25 features and 18 clusters took to run 24.850301599999966
PCA with 25 features and 19 clusters took to run 31.638864499999954
Neural Netwotk with PCA with 25 features and 19 clusters took to run

Neural Netwotk with PCA with 28 features and 8 clusters took to run 8.471556599999985
PCA with 28 features and 9 clusters took to run 4.869599099999959
Neural Netwotk with PCA with 28 features and 9 clusters took to run 5.268333900000016
PCA with 28 features and 10 clusters took to run 5.334141799999998
Neural Netwotk with PCA with 28 features and 10 clusters took to run 5.554513199999974
PCA with 28 features and 11 clusters took to run 10.515969400000131
Neural Netwotk with PCA with 28 features and 11 clusters took to run 10.637544400000024
PCA with 28 features and 12 clusters took to run 4.3590827000000445
Neural Netwotk with PCA with 28 features and 12 clusters took to run 4.626302800000076
PCA with 28 features and 13 clusters took to run 13.963362499999903
Neural Netwotk with PCA with 28 features and 13 clusters took to run 14.721796300000051
PCA with 28 features and 14 clusters took to run 17.476814200000035
Neural Netwotk with PCA with 28 features and 14 clusters took to run 17.9

Neural Netwotk with PCA with 31 features and 3 clusters took to run 0.7841636000002836
PCA with 31 features and 4 clusters took to run 0.8608967000000121
Neural Netwotk with PCA with 31 features and 4 clusters took to run 1.0256794000001719
PCA with 31 features and 5 clusters took to run 1.0656094999999368
Neural Netwotk with PCA with 31 features and 5 clusters took to run 1.2668831999999384
PCA with 31 features and 6 clusters took to run 2.806137099999887
Neural Netwotk with PCA with 31 features and 6 clusters took to run 2.8539882999998554
PCA with 31 features and 7 clusters took to run 6.186403899999732
Neural Netwotk with PCA with 31 features and 7 clusters took to run 6.306701699999849
PCA with 31 features and 8 clusters took to run 18.31736950000004
Neural Netwotk with PCA with 31 features and 8 clusters took to run 18.476076899999953
PCA with 31 features and 9 clusters took to run 1.7961138000000574
Neural Netwotk with PCA with 31 features and 9 clusters took to run 1.9728372999

PCA with 33 features and 18 clusters took to run 5.092368000000079
Neural Netwotk with PCA with 33 features and 18 clusters took to run 5.575246900000366
PCA with 33 features and 19 clusters took to run 5.213521500000297
Neural Netwotk with PCA with 33 features and 19 clusters took to run 5.256073800000195
Neural Netwotk with PCA with 34 features took to run 15.176452899999731
PCA with 34 features and 1 clusters took to run 0.1378819999999905
Neural Netwotk with PCA with 34 features and 1 clusters took to run 0.1797281999997722
PCA with 34 features and 2 clusters took to run 0.7045885999996244
Neural Netwotk with PCA with 34 features and 2 clusters took to run 0.747791999999663
PCA with 34 features and 3 clusters took to run 0.5727090000000317
Neural Netwotk with PCA with 34 features and 3 clusters took to run 0.761521900000389
PCA with 34 features and 4 clusters took to run 0.6278059000001122
Neural Netwotk with PCA with 34 features and 4 clusters took to run 0.7950644000002285
PCA wi

In [12]:
print(homogeneity_sol)
print(neuralnetwork_sol)

   Algorithm  Components  Clusters  Homogeneity          Time
0       Test           0         0     0.000000      0.000000
0        PCA           1         1     0.000000      0.375998
0        PCA           1         2     0.185159      0.094994
0        PCA           1         3     0.194616      0.089923
0        PCA           1         4     0.226261      0.230753
..       ...         ...       ...          ...           ...
0        PCA          34        15     0.296856     42.848063
0        PCA          34        16     0.334887     25.573489
0        PCA          34        17     0.326698  21829.331742
0        PCA          34        18     0.314325     79.111243
0        PCA          34        19     0.323948     77.341359

[647 rows x 5 columns]
   Algorithm  Components  Clusters  AccuracyIn  AccuracyOut          Time
0       Base           0         0    0.829313      0.82775     22.738217
0        PCA           1         0    0.750313      0.75175      0.642146
0        P

In [13]:
labels = []
for ite in range(1,35):
    t.tic()
    gm = GaussianMixture(n_components=ite, random_state=19830526).fit(corpus)         
    gm_label = gm.predict(corpus)
    labels.append(gm_label)
    taux = t.tocvalue()
    print("RAW with "+str(ite)+" clusters took to run " + str(taux))
    new_row = pd.DataFrame([["RAW",0,ite,homogeneity_score(label_real, gm_label),taux]],columns=["Algorithm","Components","Clusters","Homogeneity","Time"])
    homogeneity_sol = pd.concat([homogeneity_sol,new_row])
    t.tic()
    nnBP.fit(pd.get_dummies(gm_label), dfTrainingY.values.ravel())
    taux = t.tocvalue()
    dfTestY_Pred = nnBP.predict(pd.get_dummies(gm.predict(dfTestX)))
    dfTrainingY_Pred = nnBP.predict(pd.get_dummies(gm_label))
    print("Neural Netwotk after Clustring with "+str(ite)+" clusters took to run " + str(taux))
    new_row = pd.DataFrame([["RAW",0,ite,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
    neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])

RAW with 1 clusters took to run 1.913876300000993
Neural Netwotk after Clustring with 1 clusters took to run 0.09486720000131754
RAW with 2 clusters took to run 2.818636100000731
Neural Netwotk after Clustring with 2 clusters took to run 0.12067339999703108
RAW with 3 clusters took to run 1.119866799999727
Neural Netwotk after Clustring with 3 clusters took to run 0.4632966999997734
RAW with 4 clusters took to run 1.38957819999996
Neural Netwotk after Clustring with 4 clusters took to run 0.31938589999845135
RAW with 5 clusters took to run 1.7295266999972227
Neural Netwotk after Clustring with 5 clusters took to run 0.3216772999985551
RAW with 6 clusters took to run 15.434610000000248
Neural Netwotk after Clustring with 6 clusters took to run 0.08931280000251718
RAW with 7 clusters took to run 8.13148050000018
Neural Netwotk after Clustring with 7 clusters took to run 0.20527389999915613
RAW with 8 clusters took to run 3.4069182999992336
Neural Netwotk after Clustring with 8 clusters t

In [14]:
#ICA = FastICA(n_components=200,max_iter=1000)
#corpus_ica=ICA.fit_transform(corpus)

In [16]:
labels_ica = []

for iteCom in range(1,35):    
    t.tic()
    ICA = FastICA(n_components=iteCom,max_iter=1000, random_state = 19830526)
    ICA.fit(corpus)        
    corpus_ica=ICA.transform(corpus)        
    t.toc("ICA calibration for "+str(iteCom)+" components took ")
    dfTestXICA = ICA.transform(dfTestX)
    dfTrainingXICA =  ICA.transform(dfTrainingX)
    t.tic()
    nnBP.fit(dfTrainingXICA, dfTrainingY.values.ravel())
    taux = t.tocvalue()
    dfTestY_Pred = nnBP.predict(dfTestXICA)
    dfTrainingY_Pred = nnBP.predict(dfTrainingXICA)
    print("Neural Netwotk with ICA with "+str(iteCom)+" features took to run " + str(taux))
    new_row = pd.DataFrame([["ICA",iteCom,0,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
    neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])
    for ite in range(1,20):
        t.tic()
        gm_ica = GaussianMixture(n_components=ite, random_state=19830526).fit(corpus_ica)   
        gm_label_ica = gm_ica.predict(corpus_ica)
        labels_ica.append(gm_label_ica)
        taux = t.tocvalue()
        print("ICA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
        new_row = pd.DataFrame([["ICA",iteCom,ite,homogeneity_score(label_real,gm_label_ica),taux]],columns=["Algorithm","Components","Clusters","Homogeneity","Time"])
        homogeneity_sol = pd.concat([homogeneity_sol,new_row])
        nnBP.fit(pd.get_dummies(gm_label_pca), dfTrainingY.values.ravel())
        taux = t.tocvalue()
        try:
            dfTestY_Pred = nnBP.predict(pd.get_dummies(gm_ica.predict(dfTestXICA)))
            dfTrainingY_Pred = nnBP.predict(pd.get_dummies(gm_label_ica))
            print("Neural Netwotk with ICA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
            new_row = pd.DataFrame([["ICA",iteCom,ite,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
            neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])
        except:
            new_row = pd.DataFrame([["ICA",iteCom,ite,0,0,0]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
            neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])                



ICA calibration for 1 components took  1.043711 seconds.
Neural Netwotk with ICA with 1 features took to run 0.4042245000018738
ICA with 1 features and 1 clusters took to run 0.5794400999984646
ICA with 1 features and 2 clusters took to run 0.49810160000197357
ICA with 1 features and 3 clusters took to run 0.9940344999995432
ICA with 1 features and 4 clusters took to run 0.26633850000143866
ICA with 1 features and 5 clusters took to run 0.37350579999838374
ICA with 1 features and 6 clusters took to run 0.5167631000003894
ICA with 1 features and 7 clusters took to run 0.7287292999972124
ICA with 1 features and 8 clusters took to run 0.8374455000011949
ICA with 1 features and 9 clusters took to run 0.7162453999990248
ICA with 1 features and 10 clusters took to run 0.8163901000007172
ICA with 1 features and 11 clusters took to run 0.9852534999990894
ICA with 1 features and 12 clusters took to run 0.9378881000011461
ICA with 1 features and 13 clusters took to run 0.9211180999991484
ICA wit



ICA calibration for 2 components took  0.442089 seconds.
Neural Netwotk with ICA with 2 features took to run 0.0934464999991178
ICA with 2 features and 1 clusters took to run 0.08513070000117295
ICA with 2 features and 2 clusters took to run 0.23593700000128592
ICA with 2 features and 3 clusters took to run 0.6194868000020506
ICA with 2 features and 4 clusters took to run 0.3294967000001634
ICA with 2 features and 5 clusters took to run 0.34959410000010394
ICA with 2 features and 6 clusters took to run 0.281566899997415
ICA with 2 features and 7 clusters took to run 0.33108310000170604
ICA with 2 features and 8 clusters took to run 0.3697475000008126
ICA with 2 features and 9 clusters took to run 0.42702160000044387
ICA with 2 features and 10 clusters took to run 0.45363069999802974
ICA with 2 features and 11 clusters took to run 0.4205837000008614
ICA with 2 features and 12 clusters took to run 0.5865613000023586
ICA with 2 features and 13 clusters took to run 0.5699611999989429
ICA w



ICA calibration for 3 components took  0.341097 seconds.
Neural Netwotk with ICA with 3 features took to run 0.11325139999826206
ICA with 3 features and 1 clusters took to run 0.1383966000030341
ICA with 3 features and 2 clusters took to run 0.3556334000022616
ICA with 3 features and 3 clusters took to run 0.3763582999999926
ICA with 3 features and 4 clusters took to run 0.35018830000262824
ICA with 3 features and 5 clusters took to run 0.5711577000001853
ICA with 3 features and 6 clusters took to run 0.5098522000007506
ICA with 3 features and 7 clusters took to run 0.8240695999993477
ICA with 3 features and 8 clusters took to run 1.4142881000007037
ICA with 3 features and 9 clusters took to run 1.4349524999997811
ICA with 3 features and 10 clusters took to run 1.3169683999985864
ICA with 3 features and 11 clusters took to run 1.2079961999988882
ICA with 3 features and 12 clusters took to run 1.5596196000005875
ICA with 3 features and 13 clusters took to run 0.9448951999984274
ICA with



ICA calibration for 4 components took  0.270560 seconds.
Neural Netwotk with ICA with 4 features took to run 1.4425033000006806
ICA with 4 features and 1 clusters took to run 0.04168760000175098
ICA with 4 features and 2 clusters took to run 0.49750849999691127
ICA with 4 features and 3 clusters took to run 0.7529575000007753
ICA with 4 features and 4 clusters took to run 0.6174473999999464
ICA with 4 features and 5 clusters took to run 1.449514600000839
ICA with 4 features and 6 clusters took to run 1.2261489000011352
ICA with 4 features and 7 clusters took to run 1.20231579999745
ICA with 4 features and 8 clusters took to run 2.1248072999987926
ICA with 4 features and 9 clusters took to run 1.1372150999995938
ICA with 4 features and 10 clusters took to run 1.3443401999975322
ICA with 4 features and 11 clusters took to run 0.6235034000019368
ICA with 4 features and 12 clusters took to run 1.3899184000001696
ICA with 4 features and 13 clusters took to run 1.3880169000003661
ICA with 4 



ICA calibration for 5 components took  0.730808 seconds.
Neural Netwotk with ICA with 5 features took to run 1.4337401000011596
ICA with 5 features and 1 clusters took to run 0.1290889000010793
ICA with 5 features and 2 clusters took to run 0.88559560000067
ICA with 5 features and 3 clusters took to run 1.8635211000000709
ICA with 5 features and 4 clusters took to run 1.7611800999984553
ICA with 5 features and 5 clusters took to run 1.9311544000011054
ICA with 5 features and 6 clusters took to run 2.1563219999989087
ICA with 5 features and 7 clusters took to run 2.0061721999991278
ICA with 5 features and 8 clusters took to run 2.8660018000009586
ICA with 5 features and 9 clusters took to run 2.5112125000014203
ICA with 5 features and 10 clusters took to run 2.7567094000005454
ICA with 5 features and 11 clusters took to run 13.072966900002939
ICA with 5 features and 12 clusters took to run 2.096558900000673
ICA with 5 features and 13 clusters took to run 2.06670959999974
ICA with 5 feat



ICA calibration for 6 components took  0.476796 seconds.
Neural Netwotk with ICA with 6 features took to run 0.11315889999968931
ICA with 6 features and 1 clusters took to run 0.13741809999919496
ICA with 6 features and 2 clusters took to run 0.9474073999990651
ICA with 6 features and 3 clusters took to run 0.3729877000005217
ICA with 6 features and 4 clusters took to run 0.609179299997777
ICA with 6 features and 5 clusters took to run 13.794851599999674
ICA with 6 features and 6 clusters took to run 8.452793800002837
ICA with 6 features and 7 clusters took to run 9.337591700001212
ICA with 6 features and 8 clusters took to run 1.8490549000016472
ICA with 6 features and 9 clusters took to run 1.6944989999974496
ICA with 6 features and 10 clusters took to run 18.008267199998954
ICA with 6 features and 11 clusters took to run 13.563565299999027
ICA with 6 features and 12 clusters took to run 1.928464000000531
ICA with 6 features and 13 clusters took to run 2.093418799999199
ICA with 6 fe



ICA calibration for 7 components took  1.931995 seconds.
Neural Netwotk with ICA with 7 features took to run 2.7379424999999173
ICA with 7 features and 1 clusters took to run 3.37365839999984
ICA with 7 features and 2 clusters took to run 4.9155226000002585
ICA with 7 features and 3 clusters took to run 12.576457700000901
ICA with 7 features and 4 clusters took to run 5.969904899997346
ICA with 7 features and 5 clusters took to run 2.229594000000361
ICA with 7 features and 6 clusters took to run 0.9884342000004835
ICA with 7 features and 7 clusters took to run 1.212906100001419
ICA with 7 features and 8 clusters took to run 1.136121000003186
ICA with 7 features and 9 clusters took to run 2.1318936999996367
ICA with 7 features and 10 clusters took to run 29.118656099999498
ICA with 7 features and 11 clusters took to run 1.4045793999976013
ICA with 7 features and 12 clusters took to run 1.677508900000248
ICA with 7 features and 13 clusters took to run 1.814277100002073
ICA with 7 feature



ICA calibration for 8 components took  21.956320 seconds.
Neural Netwotk with ICA with 8 features took to run 0.6587626000000455
ICA with 8 features and 1 clusters took to run 0.03794279999783612
ICA with 8 features and 2 clusters took to run 0.058010500000818865
ICA with 8 features and 3 clusters took to run 0.23576329999923473
ICA with 8 features and 4 clusters took to run 3.808953700001439
ICA with 8 features and 5 clusters took to run 3.0575936999994155
ICA with 8 features and 6 clusters took to run 0.1740100999995775
ICA with 8 features and 7 clusters took to run 0.35446850000153063
ICA with 8 features and 8 clusters took to run 0.33337639999808744
ICA with 8 features and 9 clusters took to run 0.29294259999733185
ICA with 8 features and 10 clusters took to run 0.675212800000736
ICA with 8 features and 11 clusters took to run 0.49975070000073174
ICA with 8 features and 12 clusters took to run 0.7424443000018073
ICA with 8 features and 13 clusters took to run 0.8974276000008103
ICA



ICA calibration for 9 components took  5.677330 seconds.
Neural Netwotk with ICA with 9 features took to run 0.0771677999982785
ICA with 9 features and 1 clusters took to run 0.03617459999804851
ICA with 9 features and 2 clusters took to run 0.15628120000110357
ICA with 9 features and 3 clusters took to run 0.107022500000312
ICA with 9 features and 4 clusters took to run 6.335086700000829
ICA with 9 features and 5 clusters took to run 0.5947170999970695
ICA with 9 features and 6 clusters took to run 0.7709833000008075
ICA with 9 features and 7 clusters took to run 1.5775164999977278
ICA with 9 features and 8 clusters took to run 3.0052813999973296
ICA with 9 features and 9 clusters took to run 1.3843105000014475
ICA with 9 features and 10 clusters took to run 1.383368499999051
ICA with 9 features and 11 clusters took to run 1.5188314000006358
ICA with 9 features and 12 clusters took to run 1.095121400001517
ICA with 9 features and 13 clusters took to run 1.0948914999971748
ICA with 9 f



ICA calibration for 10 components took  8.190637 seconds.
Neural Netwotk with ICA with 10 features took to run 0.029897000000346452
ICA with 10 features and 1 clusters took to run 0.05805549999786308
ICA with 10 features and 2 clusters took to run 0.14789659999951255
ICA with 10 features and 3 clusters took to run 0.3518632000013895
ICA with 10 features and 4 clusters took to run 0.5800583999989612
ICA with 10 features and 5 clusters took to run 0.42092159999810974
ICA with 10 features and 6 clusters took to run 0.4519404000020586
ICA with 10 features and 7 clusters took to run 0.8324350000002596
ICA with 10 features and 8 clusters took to run 0.5669613999998546
ICA with 10 features and 9 clusters took to run 0.3113401000009617
ICA with 10 features and 10 clusters took to run 0.36559500000294065
ICA with 10 features and 11 clusters took to run 0.4543478000014147
ICA with 10 features and 12 clusters took to run 1.9784141999989515
ICA with 10 features and 13 clusters took to run 5.950457



ICA calibration for 11 components took  0.467333 seconds.
Neural Netwotk with ICA with 11 features took to run 1.892413100002159
ICA with 11 features and 1 clusters took to run 4.254436500003067
ICA with 11 features and 2 clusters took to run 7.302795799998421
ICA with 11 features and 3 clusters took to run 3.426809600001434
ICA with 11 features and 4 clusters took to run 1.2067837000031432
ICA with 11 features and 5 clusters took to run 1.1136683000004268
ICA with 11 features and 6 clusters took to run 0.410261699998955
ICA with 11 features and 7 clusters took to run 14.732242099998984
ICA with 11 features and 8 clusters took to run 0.41145999999935157
ICA with 11 features and 9 clusters took to run 0.570077800002764
ICA with 11 features and 10 clusters took to run 0.6635581000009552
ICA with 11 features and 11 clusters took to run 15.439283200001228
ICA with 11 features and 12 clusters took to run 0.960576300003595
ICA with 11 features and 13 clusters took to run 0.7537163000015425
I



ICA calibration for 12 components took  0.638578 seconds.
Neural Netwotk with ICA with 12 features took to run 0.02622229999906267
ICA with 12 features and 1 clusters took to run 0.06368080000174814
ICA with 12 features and 2 clusters took to run 0.19625380000070436
ICA with 12 features and 3 clusters took to run 0.25799370000095223
ICA with 12 features and 4 clusters took to run 1.3684164000005694
ICA with 12 features and 5 clusters took to run 2.8556654999993043
ICA with 12 features and 6 clusters took to run 0.49095399999714573
ICA with 12 features and 7 clusters took to run 0.47898220000206493
ICA with 12 features and 8 clusters took to run 8.260840899998584
ICA with 12 features and 9 clusters took to run 7.062431399997877
ICA with 12 features and 10 clusters took to run 0.9237737999974343
ICA with 12 features and 11 clusters took to run 16.17645849999826
ICA with 12 features and 12 clusters took to run 1.6152922000001126
ICA with 12 features and 13 clusters took to run 15.67449979



ICA calibration for 13 components took  0.188795 seconds.
Neural Netwotk with ICA with 13 features took to run 0.02369150000231457
ICA with 13 features and 1 clusters took to run 0.041270199999416946
ICA with 13 features and 2 clusters took to run 0.0842677000000549
ICA with 13 features and 3 clusters took to run 0.11787960000219755
ICA with 13 features and 4 clusters took to run 0.1519317999991472
ICA with 13 features and 5 clusters took to run 0.5134546000008413
ICA with 13 features and 6 clusters took to run 6.097987100001774
ICA with 13 features and 7 clusters took to run 0.785553699999582
ICA with 13 features and 8 clusters took to run 0.3068553000011889
ICA with 13 features and 9 clusters took to run 0.34910300000046846
ICA with 13 features and 10 clusters took to run 1.6104359999990265
ICA with 13 features and 11 clusters took to run 0.9336963000023388
ICA with 13 features and 12 clusters took to run 0.6720449999993434
ICA with 13 features and 13 clusters took to run 0.638095399



ICA calibration for 14 components took  90.977296 seconds.
Neural Netwotk with ICA with 14 features took to run 2.291110199999821
ICA with 14 features and 1 clusters took to run 9.552274300000136
ICA with 14 features and 2 clusters took to run 2.8925357999978587
ICA with 14 features and 3 clusters took to run 12.629575800001476
ICA with 14 features and 4 clusters took to run 23.751982099998713
ICA with 14 features and 5 clusters took to run 36.33041629999934
ICA with 14 features and 6 clusters took to run 2.0613491000003705
ICA with 14 features and 7 clusters took to run 1.6772911999978533
ICA with 14 features and 8 clusters took to run 4.113782399999764
ICA with 14 features and 9 clusters took to run 31.986372599996685
ICA with 14 features and 10 clusters took to run 4.175907100001496
ICA with 14 features and 11 clusters took to run 32.54636300000129
ICA with 14 features and 12 clusters took to run 34.899763299999904
ICA with 14 features and 13 clusters took to run 64.93896210000094
I



ICA calibration for 15 components took  31.006289 seconds.
Neural Netwotk with ICA with 15 features took to run 0.07035950000135927
ICA with 15 features and 1 clusters took to run 0.060574900002393406
ICA with 15 features and 2 clusters took to run 0.9843909000010171
ICA with 15 features and 3 clusters took to run 0.9386653000001388
ICA with 15 features and 4 clusters took to run 0.7868381999978737
ICA with 15 features and 5 clusters took to run 0.6170824000000721
ICA with 15 features and 6 clusters took to run 1.0497609000012744
ICA with 15 features and 7 clusters took to run 1.6400768000021344
ICA with 15 features and 8 clusters took to run 7.7177883999975165
ICA with 15 features and 9 clusters took to run 0.6617721000002348
ICA with 15 features and 10 clusters took to run 0.8090202999992471
ICA with 15 features and 11 clusters took to run 3.0427584999997634
ICA with 15 features and 12 clusters took to run 0.9535031000013987
ICA with 15 features and 13 clusters took to run 1.00336869



ICA calibration for 16 components took  62.856095 seconds.
Neural Netwotk with ICA with 16 features took to run 8.362528100002237
ICA with 16 features and 1 clusters took to run 6.059403699997347
ICA with 16 features and 2 clusters took to run 2.4292872000005445
ICA with 16 features and 3 clusters took to run 0.2841048000009323
ICA with 16 features and 4 clusters took to run 1.0045821000021533
ICA with 16 features and 5 clusters took to run 0.686692100000073
ICA with 16 features and 6 clusters took to run 14.356864099998347
ICA with 16 features and 7 clusters took to run 0.7000898000005691
ICA with 16 features and 8 clusters took to run 0.8914139000007708
ICA with 16 features and 9 clusters took to run 0.3829369000013685
ICA with 16 features and 10 clusters took to run 0.8369416000023193
ICA with 16 features and 11 clusters took to run 15.877459800001816
ICA with 16 features and 12 clusters took to run 0.6764408999988518
ICA with 16 features and 13 clusters took to run 0.89867740000045



ICA calibration for 17 components took  19.889279 seconds.
Neural Netwotk with ICA with 17 features took to run 1.3026033000023745
ICA with 17 features and 1 clusters took to run 8.873511199999484
ICA with 17 features and 2 clusters took to run 7.394769800001086
ICA with 17 features and 3 clusters took to run 0.24387570000180858
ICA with 17 features and 4 clusters took to run 1.3295785000009346
ICA with 17 features and 5 clusters took to run 0.8752732000029937
ICA with 17 features and 6 clusters took to run 18.128060899998673
ICA with 17 features and 7 clusters took to run 1.3919724000006681
ICA with 17 features and 8 clusters took to run 1.2063412999996217
ICA with 17 features and 9 clusters took to run 15.337493400002131
ICA with 17 features and 10 clusters took to run 1.1908957999985432
ICA with 17 features and 11 clusters took to run 1.1139274000015575
ICA with 17 features and 12 clusters took to run 2.7088045999989845
ICA with 17 features and 13 clusters took to run 1.978559099999



ICA calibration for 18 components took  8.947932 seconds.
Neural Netwotk with ICA with 18 features took to run 1.1207022000016877
ICA with 18 features and 1 clusters took to run 0.061751599998387974
ICA with 18 features and 2 clusters took to run 0.48706570000285865
ICA with 18 features and 3 clusters took to run 2.2053863000001
ICA with 18 features and 4 clusters took to run 1.3803512000013143
ICA with 18 features and 5 clusters took to run 0.7583099000003131
ICA with 18 features and 6 clusters took to run 0.27076369999849703
ICA with 18 features and 7 clusters took to run 5.543117400000483
ICA with 18 features and 8 clusters took to run 1.9082774000016798
ICA with 18 features and 9 clusters took to run 0.6601942999986932
ICA with 18 features and 10 clusters took to run 0.8437532000025385
ICA with 18 features and 11 clusters took to run 2.988839100002224
ICA with 18 features and 12 clusters took to run 0.9167584000024362
ICA with 18 features and 13 clusters took to run 1.2739777999995



ICA calibration for 19 components took  1.541377 seconds.
Neural Netwotk with ICA with 19 features took to run 0.024820899998303503
ICA with 19 features and 1 clusters took to run 0.05503049999970244
ICA with 19 features and 2 clusters took to run 0.11963560000003781
ICA with 19 features and 3 clusters took to run 0.6857491999980994
ICA with 19 features and 4 clusters took to run 0.4911418000010599
ICA with 19 features and 5 clusters took to run 0.469162100001995
ICA with 19 features and 6 clusters took to run 14.546301800000947
ICA with 19 features and 7 clusters took to run 37.912466899997526
ICA with 19 features and 8 clusters took to run 4.313494300000457
ICA with 19 features and 9 clusters took to run 3.613521799998125
ICA with 19 features and 10 clusters took to run 37.80012270000225
ICA with 19 features and 11 clusters took to run 11.643654700001207
ICA with 19 features and 12 clusters took to run 26.894478099999105
ICA with 19 features and 13 clusters took to run 37.78454680000



ICA calibration for 20 components took  4.972187 seconds.
Neural Netwotk with ICA with 20 features took to run 0.07005950000166195
ICA with 20 features and 1 clusters took to run 4.186285299998417
ICA with 20 features and 2 clusters took to run 13.630581299999903
ICA with 20 features and 3 clusters took to run 13.621338200002356
ICA with 20 features and 4 clusters took to run 2.6817786999999953
ICA with 20 features and 5 clusters took to run 1.0989544000003661
ICA with 20 features and 6 clusters took to run 28.56493689999843
ICA with 20 features and 7 clusters took to run 1.4025103000021772
ICA with 20 features and 8 clusters took to run 1.5694741000006616
ICA with 20 features and 9 clusters took to run 1.700344099997892
ICA with 20 features and 10 clusters took to run 1.8459330000005139
ICA with 20 features and 11 clusters took to run 4.0171899999986636
ICA with 20 features and 12 clusters took to run 2.00902589999896
ICA with 20 features and 13 clusters took to run 2.1036918999998306



ICA calibration for 21 components took  1.781776 seconds.
Neural Netwotk with ICA with 21 features took to run 0.10476199999902747
ICA with 21 features and 1 clusters took to run 0.06915640000079293
ICA with 21 features and 2 clusters took to run 0.5279363000008743
ICA with 21 features and 3 clusters took to run 0.774265000000014
ICA with 21 features and 4 clusters took to run 0.6729541999993671
ICA with 21 features and 5 clusters took to run 8.698925100001361
ICA with 21 features and 6 clusters took to run 1.005724899998313
ICA with 21 features and 7 clusters took to run 1.0017678000003798
ICA with 21 features and 8 clusters took to run 0.5453297000021848
ICA with 21 features and 9 clusters took to run 0.4213516999989224
ICA with 21 features and 10 clusters took to run 0.5726183999977366
ICA with 21 features and 11 clusters took to run 5.925312600000325
ICA with 21 features and 12 clusters took to run 4.0336438000013
ICA with 21 features and 13 clusters took to run 8.408412200002203
I



ICA calibration for 22 components took  3.856269 seconds.
Neural Netwotk with ICA with 22 features took to run 0.06143520000114222
ICA with 22 features and 1 clusters took to run 0.17418029999680584
ICA with 22 features and 2 clusters took to run 0.6824209000005794
ICA with 22 features and 3 clusters took to run 0.8741641999986314
ICA with 22 features and 4 clusters took to run 1.6897690999976476
ICA with 22 features and 5 clusters took to run 8.306533099999797
ICA with 22 features and 6 clusters took to run 0.6838457000012568
ICA with 22 features and 7 clusters took to run 2.254455800000869
ICA with 22 features and 8 clusters took to run 1.8315963999993983
ICA with 22 features and 9 clusters took to run 1.0721005000013974
ICA with 22 features and 10 clusters took to run 1.0656229000014719
ICA with 22 features and 11 clusters took to run 0.6260623000016494
ICA with 22 features and 12 clusters took to run 8.590782600000239
ICA with 22 features and 13 clusters took to run 17.322403199999



ICA calibration for 23 components took  16.851737 seconds.
Neural Netwotk with ICA with 23 features took to run 0.031347899999673245
ICA with 23 features and 1 clusters took to run 0.18649880000157282
ICA with 23 features and 2 clusters took to run 0.9675272999993467
ICA with 23 features and 3 clusters took to run 16.186071600001014
ICA with 23 features and 4 clusters took to run 1.9722531999977946
ICA with 23 features and 5 clusters took to run 34.420731300000625
ICA with 23 features and 6 clusters took to run 17.85137079999913
ICA with 23 features and 7 clusters took to run 3.053936199998134
ICA with 23 features and 8 clusters took to run 17.628870700002153
ICA with 23 features and 9 clusters took to run 1.1202157000006991
ICA with 23 features and 10 clusters took to run 1.0590241999998398
ICA with 23 features and 11 clusters took to run 15.977443100000528
ICA with 23 features and 12 clusters took to run 1.5077289000000746
ICA with 23 features and 13 clusters took to run 0.9207349000



ICA calibration for 24 components took  0.542886 seconds.
Neural Netwotk with ICA with 24 features took to run 0.022291799999948125
ICA with 24 features and 1 clusters took to run 0.08211719999962952
ICA with 24 features and 2 clusters took to run 0.2565065000017057
ICA with 24 features and 3 clusters took to run 6.508147000000463
ICA with 24 features and 4 clusters took to run 0.8386121000003186
ICA with 24 features and 5 clusters took to run 1.7100642999976117
ICA with 24 features and 6 clusters took to run 4.293911700002354
ICA with 24 features and 7 clusters took to run 1.0566911000023538
ICA with 24 features and 8 clusters took to run 0.8126453000004403
ICA with 24 features and 9 clusters took to run 8.015891600000032
ICA with 24 features and 10 clusters took to run 1.121509700002207
ICA with 24 features and 11 clusters took to run 3.0787610999977915
ICA with 24 features and 12 clusters took to run 1.7588309999991907
ICA with 24 features and 13 clusters took to run 1.3569858000009



ICA calibration for 25 components took  1.528025 seconds.
Neural Netwotk with ICA with 25 features took to run 0.05214020000130404
ICA with 25 features and 1 clusters took to run 0.8454640999989351
ICA with 25 features and 2 clusters took to run 9.420027500000288
ICA with 25 features and 3 clusters took to run 0.2173356999992393
ICA with 25 features and 4 clusters took to run 0.24676090000139084
ICA with 25 features and 5 clusters took to run 0.344149100001232
ICA with 25 features and 6 clusters took to run 2.7376269000014872
ICA with 25 features and 7 clusters took to run 2.840267000003223
ICA with 25 features and 8 clusters took to run 1.6035109999975248
ICA with 25 features and 9 clusters took to run 3.903963799999474
ICA with 25 features and 10 clusters took to run 4.119253500000923
ICA with 25 features and 11 clusters took to run 5.171933399997215
ICA with 25 features and 12 clusters took to run 3.0798996000012266
ICA with 25 features and 13 clusters took to run 8.73059959999955
I



ICA calibration for 26 components took  42.875316 seconds.
Neural Netwotk with ICA with 26 features took to run 0.08580919999803882
ICA with 26 features and 1 clusters took to run 0.4597289000012097
ICA with 26 features and 2 clusters took to run 1.360624100001587
ICA with 26 features and 3 clusters took to run 40.16671739999947
ICA with 26 features and 4 clusters took to run 42.02745660000073
ICA with 26 features and 5 clusters took to run 6.402860599999258
ICA with 26 features and 6 clusters took to run 50.934598100000585
ICA with 26 features and 7 clusters took to run 25.17521839999972
ICA with 26 features and 8 clusters took to run 3.2203269000019645
ICA with 26 features and 9 clusters took to run 3.4114769000007072
ICA with 26 features and 10 clusters took to run 3.8136360999997123
ICA with 26 features and 11 clusters took to run 4.040134099999705
ICA with 26 features and 12 clusters took to run 13.253512500003126
ICA with 26 features and 13 clusters took to run 14.447639300000446



ICA calibration for 27 components took  2.099010 seconds.
Neural Netwotk with ICA with 27 features took to run 0.07861849999972037
ICA with 27 features and 1 clusters took to run 1.0537867999992159
ICA with 27 features and 2 clusters took to run 23.286364700001286
ICA with 27 features and 3 clusters took to run 8.756264700001338
ICA with 27 features and 4 clusters took to run 3.229220400000486
ICA with 27 features and 5 clusters took to run 4.095336400001543
ICA with 27 features and 6 clusters took to run 2.742889400000422
ICA with 27 features and 7 clusters took to run 3.669064800000342
ICA with 27 features and 8 clusters took to run 1.120689700001094
ICA with 27 features and 9 clusters took to run 1.160353600000235
ICA with 27 features and 10 clusters took to run 0.704170900000463
ICA with 27 features and 11 clusters took to run 0.7723134000007121
ICA with 27 features and 12 clusters took to run 0.8780488999982481
ICA with 27 features and 13 clusters took to run 17.52656070000012
ICA



ICA calibration for 28 components took  1.178377 seconds.
Neural Netwotk with ICA with 28 features took to run 2.1016285000005155
ICA with 28 features and 1 clusters took to run 0.07816390000152751
ICA with 28 features and 2 clusters took to run 0.16013879999809433
ICA with 28 features and 3 clusters took to run 0.5529333000013139
ICA with 28 features and 4 clusters took to run 1.895812200000364
ICA with 28 features and 5 clusters took to run 1.072383599999739
ICA with 28 features and 6 clusters took to run 0.4792684999993071
ICA with 28 features and 7 clusters took to run 2.627947299999505
ICA with 28 features and 8 clusters took to run 0.6229745999989973
ICA with 28 features and 9 clusters took to run 0.6158752999981516
ICA with 28 features and 10 clusters took to run 0.7030444999982137
ICA with 28 features and 11 clusters took to run 0.8434798000016599
ICA with 28 features and 12 clusters took to run 0.8992471000019577
ICA with 28 features and 13 clusters took to run 1.2917269000026



ICA calibration for 29 components took  1.232271 seconds.
Neural Netwotk with ICA with 29 features took to run 0.03279190000102972
ICA with 29 features and 1 clusters took to run 0.1588951999983692
ICA with 29 features and 2 clusters took to run 0.3064148999983445
ICA with 29 features and 3 clusters took to run 0.4524654999986524
ICA with 29 features and 4 clusters took to run 0.5222002000009525
ICA with 29 features and 5 clusters took to run 6.165967900000396
ICA with 29 features and 6 clusters took to run 7.095165100003214
ICA with 29 features and 7 clusters took to run 7.718444500002079
ICA with 29 features and 8 clusters took to run 63.167872900001385
ICA with 29 features and 9 clusters took to run 57.925546999998915
ICA with 29 features and 10 clusters took to run 50.85875329999908
ICA with 29 features and 11 clusters took to run 9.129021699998702
ICA with 29 features and 12 clusters took to run 11.397298899999441
ICA with 29 features and 13 clusters took to run 12.942341400001169



ICA calibration for 30 components took  21.045220 seconds.
Neural Netwotk with ICA with 30 features took to run 0.09464630000002217
ICA with 30 features and 1 clusters took to run 0.7363081999974384
ICA with 30 features and 2 clusters took to run 1.472040399999969
ICA with 30 features and 3 clusters took to run 2.028749400000379
ICA with 30 features and 4 clusters took to run 6.8805314999990514
ICA with 30 features and 5 clusters took to run 10.844164599999203
ICA with 30 features and 6 clusters took to run 11.730604700002004
ICA with 30 features and 7 clusters took to run 20.89136040000085
ICA with 30 features and 8 clusters took to run 21.194491999998718
ICA with 30 features and 9 clusters took to run 3.820641100002831
ICA with 30 features and 10 clusters took to run 2.9033681000000797
ICA with 30 features and 11 clusters took to run 3.6603019000031054
ICA with 30 features and 12 clusters took to run 2.220557700002246
ICA with 30 features and 13 clusters took to run 1.884245799999917



ICA calibration for 31 components took  14.700415 seconds.
Neural Netwotk with ICA with 31 features took to run 35.75415859999703
ICA with 31 features and 1 clusters took to run 1.155384799996682
ICA with 31 features and 2 clusters took to run 2.649264600000606
ICA with 31 features and 3 clusters took to run 3.274224499997217
ICA with 31 features and 4 clusters took to run 4.472139299999981
ICA with 31 features and 5 clusters took to run 5.114061500000389
ICA with 31 features and 6 clusters took to run 7.452080900002329
ICA with 31 features and 7 clusters took to run 13.960902999999234
ICA with 31 features and 8 clusters took to run 11.42237390000082
ICA with 31 features and 9 clusters took to run 8.491568699999334
ICA with 31 features and 10 clusters took to run 25.585859700000583
ICA with 31 features and 11 clusters took to run 36.369021200000134
ICA with 31 features and 12 clusters took to run 35.91952219999803
ICA with 31 features and 13 clusters took to run 43.40310970000064
ICA w



ICA calibration for 32 components took  3.688324 seconds.
Neural Netwotk with ICA with 32 features took to run 0.13622079999913694
ICA with 32 features and 1 clusters took to run 0.5095490000021528
ICA with 32 features and 2 clusters took to run 0.9979920999976457
ICA with 32 features and 3 clusters took to run 1.4749604999997246
ICA with 32 features and 4 clusters took to run 2.3078564000024926
ICA with 32 features and 5 clusters took to run 5.812361000000237
ICA with 32 features and 6 clusters took to run 7.261095199999545
ICA with 32 features and 7 clusters took to run 9.361636400000862
ICA with 32 features and 8 clusters took to run 10.947782200000802
ICA with 32 features and 9 clusters took to run 9.433569900000293
ICA with 32 features and 10 clusters took to run 8.530764899998758
ICA with 32 features and 11 clusters took to run 11.264779799999815
ICA with 32 features and 12 clusters took to run 14.000158300001203
ICA with 32 features and 13 clusters took to run 12.337105900001916



ICA calibration for 33 components took  7.867441 seconds.
Neural Netwotk with ICA with 33 features took to run 60.744811700002174
ICA with 33 features and 1 clusters took to run 0.9041056999994908
ICA with 33 features and 2 clusters took to run 1.3080895999992208
ICA with 33 features and 3 clusters took to run 2.4427624000018113
ICA with 33 features and 4 clusters took to run 3.3871875000004366
ICA with 33 features and 5 clusters took to run 3.9790084999985993
ICA with 33 features and 6 clusters took to run 3.3618668999988586
ICA with 33 features and 7 clusters took to run 4.2084021999980905
ICA with 33 features and 8 clusters took to run 7.342826500000228
ICA with 33 features and 9 clusters took to run 7.337579099999857
ICA with 33 features and 10 clusters took to run 5.941481800000474
ICA with 33 features and 11 clusters took to run 6.792210899999191
ICA with 33 features and 12 clusters took to run 2.9964473
ICA with 33 features and 13 clusters took to run 3.012651299999561
ICA with 



ICA calibration for 34 components took  1.497948 seconds.
Neural Netwotk with ICA with 34 features took to run 0.04357069999969099
ICA with 34 features and 1 clusters took to run 0.134118000001763
ICA with 34 features and 2 clusters took to run 0.5543641000003845
ICA with 34 features and 3 clusters took to run 0.771585700000287
ICA with 34 features and 4 clusters took to run 1.4985536000021966
ICA with 34 features and 5 clusters took to run 1.324975199997425
ICA with 34 features and 6 clusters took to run 1.7635053000012704
ICA with 34 features and 7 clusters took to run 1.7367388999991817
ICA with 34 features and 8 clusters took to run 2.4787103999988176
ICA with 34 features and 9 clusters took to run 2.264461599999777
ICA with 34 features and 10 clusters took to run 2.5746933999980683
ICA with 34 features and 11 clusters took to run 3.9723865000014484
ICA with 34 features and 12 clusters took to run 4.5834951999968325
ICA with 34 features and 13 clusters took to run 5.154412199997751

In [17]:
print(corpus.shape)
print(len(label_real))

(16000, 35)
16000


In [18]:
#RCA = random_projection.SparseRandomProjection()
#corpus_rca = RCA.fit_transform(corpus)

In [20]:
labels_rca = []
for iteCom in range(1,35):
    t.tic()
    RCA = random_projection.SparseRandomProjection(n_components=iteCom, random_state = 19830526)
    RCA.fit(corpus)
    corpus_rca = RCA.transform(corpus)
    t.toc("RCA calibration for "+str(iteCom)+" components took ")
    dfTestXRCA = RCA.transform(dfTestX)
    dfTrainingXRCA =  RCA.transform(dfTrainingX)
    t.tic()
    nnBP.fit(dfTrainingXRCA, dfTrainingY.values.ravel())
    taux = t.tocvalue()
    dfTestY_Pred = nnBP.predict(dfTestXRCA)
    dfTrainingY_Pred = nnBP.predict(dfTrainingXRCA)
    print("Neural Netwotk with RCA with "+str(iteCom)+" features took to run " + str(taux))
    new_row = pd.DataFrame([["RCA",iteCom,0,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
    neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])
    for ite in range(1,20):
        t.tic()
        gm_rca = GaussianMixture(n_components=ite, random_state=19830526).fit(corpus_rca)   
        gm_label_rca = gm_rca.predict(corpus_rca)
        labels_pca.append(gm_label_rca)
        taux = t.tocvalue()
        print("RCA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
        new_row = pd.DataFrame([["RCA",iteCom,ite,homogeneity_score(label_real,gm_label_rca),taux]],columns=["Algorithm","Components","Clusters","Homogeneity","Time"])
        homogeneity_sol = pd.concat([homogeneity_sol,new_row])
        nnBP.fit(pd.get_dummies(gm_label_rca), dfTrainingY.values.ravel())
        taux = t.tocvalue()
        try:
            dfTestY_Pred = nnBP.predict(pd.get_dummies(gm_rca.predict(dfTestXRCA)))
            dfTrainingY_Pred = nnBP.predict(pd.get_dummies(gm_label_rca))
            print("Neural Netwotk with RCA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
            new_row = pd.DataFrame([["RCA",iteCom,ite,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
            neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])
        except:
            new_row = pd.DataFrame([["RCA",iteCom,ite,0,0,0]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
            neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])           

RCA calibration for 1 components took  0.021615 seconds.
Neural Netwotk with RCA with 1 features took to run 0.1207496999995783
RCA with 1 features and 1 clusters took to run 0.14306589999614516
Neural Netwotk with RCA with 1 features and 1 clusters took to run 0.232456199999433
RCA with 1 features and 2 clusters took to run 0.08828630000061821
Neural Netwotk with RCA with 1 features and 2 clusters took to run 0.24420549999922514
RCA with 1 features and 3 clusters took to run 0.6222844000003533
Neural Netwotk with RCA with 1 features and 3 clusters took to run 0.8008312000019941
RCA with 1 features and 4 clusters took to run 0.8376512000031653
Neural Netwotk with RCA with 1 features and 4 clusters took to run 1.0246970000007423
RCA with 1 features and 5 clusters took to run 0.9148862999936682
Neural Netwotk with RCA with 1 features and 5 clusters took to run 1.2642288999995799
RCA with 1 features and 6 clusters took to run 1.0081735999992816
Neural Netwotk with RCA with 1 features and 

RCA with 3 features and 18 clusters took to run 7.180933700001333
Neural Netwotk with RCA with 3 features and 18 clusters took to run 8.118707500005257
RCA with 3 features and 19 clusters took to run 11.713603199998033
Neural Netwotk with RCA with 3 features and 19 clusters took to run 11.873142099997494
RCA calibration for 4 components took  0.052826 seconds.
Neural Netwotk with RCA with 4 features took to run 3.5234200000049896
RCA with 4 features and 1 clusters took to run 0.0820300000050338
Neural Netwotk with RCA with 4 features and 1 clusters took to run 0.195747300000221
RCA with 4 features and 2 clusters took to run 0.26584310000180267
Neural Netwotk with RCA with 4 features and 2 clusters took to run 0.369576800003415
RCA with 4 features and 3 clusters took to run 0.5565491999950609
Neural Netwotk with RCA with 4 features and 3 clusters took to run 0.9392822999943746
RCA with 4 features and 4 clusters took to run 1.1928518999993685
Neural Netwotk with RCA with 4 features and 4

Neural Netwotk with RCA with 6 features and 12 clusters took to run 13.252149399995687
RCA with 6 features and 13 clusters took to run 14.287501999999222
Neural Netwotk with RCA with 6 features and 13 clusters took to run 15.186200099997222
RCA with 6 features and 14 clusters took to run 12.85766010000225
Neural Netwotk with RCA with 6 features and 14 clusters took to run 13.399390800004767
RCA with 6 features and 15 clusters took to run 7.211406599999464
Neural Netwotk with RCA with 6 features and 15 clusters took to run 7.57696869999927
RCA with 6 features and 16 clusters took to run 4.593896900005348
Neural Netwotk with RCA with 6 features and 16 clusters took to run 5.508825000004435
RCA with 6 features and 17 clusters took to run 12.740733999999065
Neural Netwotk with RCA with 6 features and 17 clusters took to run 13.117255800003477
RCA with 6 features and 18 clusters took to run 3.552324399999634
Neural Netwotk with RCA with 6 features and 18 clusters took to run 4.0143734000012

RCA with 9 features and 7 clusters took to run 19.590930500002287
Neural Netwotk with RCA with 9 features and 7 clusters took to run 20.011650499996904
RCA with 9 features and 8 clusters took to run 38.13289650000661
Neural Netwotk with RCA with 9 features and 8 clusters took to run 38.60727400000178
RCA with 9 features and 9 clusters took to run 24.875002000000677
Neural Netwotk with RCA with 9 features and 9 clusters took to run 26.089375800002017
RCA with 9 features and 10 clusters took to run 34.3784713999994
Neural Netwotk with RCA with 9 features and 10 clusters took to run 36.835092199995415
RCA with 9 features and 11 clusters took to run 25.475295899996127
Neural Netwotk with RCA with 9 features and 11 clusters took to run 25.97711849999905
RCA with 9 features and 12 clusters took to run 22.927805100000114
Neural Netwotk with RCA with 9 features and 12 clusters took to run 23.705587899996317
RCA with 9 features and 13 clusters took to run 29.179863899997144
Neural Netwotk with 

Neural Netwotk with RCA with 12 features and 1 clusters took to run 0.19313289999990957
RCA with 12 features and 2 clusters took to run 0.4332140000042273
Neural Netwotk with RCA with 12 features and 2 clusters took to run 0.47580050000397023
RCA with 12 features and 3 clusters took to run 1.335105599995586
Neural Netwotk with RCA with 12 features and 3 clusters took to run 1.6132431000005454
RCA with 12 features and 4 clusters took to run 1.8493154000025243
Neural Netwotk with RCA with 12 features and 4 clusters took to run 2.023333700002695
RCA with 12 features and 5 clusters took to run 26.77539880000404
Neural Netwotk with RCA with 12 features and 5 clusters took to run 27.077629999999772
RCA with 12 features and 6 clusters took to run 29.630732099998568
Neural Netwotk with RCA with 12 features and 6 clusters took to run 29.76511069999833
RCA with 12 features and 7 clusters took to run 49.349646399998164
Neural Netwotk with RCA with 12 features and 7 clusters took to run 49.7384420

MemoryError: Unable to allocate 1.59 MiB for an array with shape (16000, 13) and data type float64

In [None]:
labels_lda = []
   
iteCom = 1
t.tic()
LDA = LinearDiscriminantAnalysis(n_components=iteCom)
LDA.fit(corpus, label_real)
corpus_lda = LDA.transform(corpus)
t.toc("LDA calibration for "+str(iteCom)+" components took ")
dfTestXLDA = LDA.transform(dfTestX)
dfTrainingXLDA =  LDA.transform(dfTrainingX)
t.tic()
nnBP.fit(dfTrainingXLDA, dfTrainingY.values.ravel())
taux = t.tocvalue()
dfTestY_Pred = nnBP.predict(dfTestXLDA)
dfTrainingY_Pred = nnBP.predict(dfTrainingXLDA)
print("Neural Netwotk with LDA with "+str(iteCom)+" features took to run " + str(taux))
new_row = pd.DataFrame([["LDA",iteCom,0,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])

In [25]:

for ite in range(1,20):
        t.tic()
        gm_lda = GaussianMixture(n_components=ite, random_state=19830526).fit(corpus_lda)   
        gm_label_lda = gm_lda.predict(corpus_lda)
        labels_pca.append(gm_label_lda)
        taux = t.tocvalue()
        print("LDA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
        new_row = pd.DataFrame([["LDA",iteCom,ite,homogeneity_score(label_real,gm_label_lda),taux]],columns=["Algorithm","Components","Clusters","Homogeneity","Time"])
        homogeneity_sol = pd.concat([homogeneity_sol,new_row])
        nnBP.fit(pd.get_dummies(gm_label_lda), dfTrainingY.values.ravel())
        taux = t.tocvalue()
        try:
            dfTestY_Pred = nnBP.predict(pd.get_dummies(gm_rca.predict(dfTestXLDA)))
            dfTrainingY_Pred = nnBP.predict(pd.get_dummies(gm_label_lda))
            print("Neural Netwotk with LDA with "+str(iteCom)+" features and "+str(ite)+" clusters took to run " + str(taux))
            new_row = pd.DataFrame([["LDA",iteCom,ite,accuracy_score(dfTrainingY, dfTrainingY_Pred),accuracy_score(dfTestY, dfTestY_Pred),taux]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
            neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])
        except:
            new_row = pd.DataFrame([["LDA",iteCom,ite,0,0,0]],columns=["Algorithm","Components","Clusters","AccuracyIn","AccuracyOut","Time"])
            neuralnetwork_sol = pd.concat([neuralnetwork_sol,new_row])         

LDA with 1 features and 1 clusters took to run 0.0267738000038662
LDA with 1 features and 2 clusters took to run 0.09189830000104848
LDA with 1 features and 3 clusters took to run 0.2346042999997735
LDA with 1 features and 4 clusters took to run 0.2683328999992227
LDA with 1 features and 5 clusters took to run 0.44353530000080355
LDA with 1 features and 6 clusters took to run 0.4209740000005695
LDA with 1 features and 7 clusters took to run 0.633132700000715
LDA with 1 features and 8 clusters took to run 0.3940342999994755
LDA with 1 features and 9 clusters took to run 1.5814243000058923
LDA with 1 features and 10 clusters took to run 1.7090584000034141
LDA with 1 features and 11 clusters took to run 2.111915299996326
LDA with 1 features and 12 clusters took to run 1.4363257000004523
LDA with 1 features and 13 clusters took to run 2.7631215999936103
LDA with 1 features and 14 clusters took to run 4.939558899997792
LDA with 1 features and 15 clusters took to run 5.274703799994313
LDA wi

In [26]:
df = pd.DataFrame(label_real, index=corpus.index ,columns =['Real'])

In [27]:
homogeneity_sol.to_csv("IncomeClusteringResultsEM.csv")
neuralnetwork_sol.to_csv("IncomeClusteringResultsEMNeuralNetwork.csv")

In [None]:
df_ica = pd.DataFrame(labels_ica, columns=corpus.index).T
df_pca = pd.DataFrame(labels_pca, columns=corpus.index).T
df_rca = pd.DataFrame(labels_rca, columns=corpus.index).T
df_lda = pd.DataFrame(labels_lda, columns=corpus.index).T
df_raw = pd.DataFrame(labels, columns=corpus.index).T
df.to_csv("actuals_Income_EM.csv")
df_ica.to_csv("ica_Income_EM.csv")
df_pca.to_csv("pca_Income_EM.csv")
df_rca.to_csv("rca_Income_EM.csv")
df_lda.to_csv("lda_Income_EM.csv")
df_raw.to_csv("raw_Income_EM.csv")