# **Setup**

In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import validation_curve,cross_val_score,train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.ensemble import GradientBoostingRegressor
from fancyimpute import KNN, NuclearNormMinimization, SoftImpute, BiScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics.regression import r2_score

import warnings 
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [None]:
train_main = pd.read_csv('train.csv')
test_main = pd.read_csv('test.csv')

galaxies_dict=dict(zip(
    [x for x in range(len(train_main['galaxy'].unique()))],
    train_main['galaxy'].unique()))

train_main.galaxy = [k for x in train_main.galaxy for k,v in galaxies_dict.items() if x==v]
test_main.galaxy = [k for x in test_main.galaxy for k,v in galaxies_dict.items() if x==v]

In [None]:
columns = ['galactic year', 'galaxy', 'existence expectancy index',
       'existence expectancy at birth', 'Gross income per capita',
       'Income Index', 'Expected years of education (galactic years)',
       'Mean years of education (galactic years)',
       'Intergalactic Development Index (IDI)', 'Education Index',
       'Intergalactic Development Index (IDI), Rank',
       'Population using at least basic drinking-water services (%)',
       'Population using at least basic sanitation services (%)',
       'Gross capital formation (% of GGP)', 'Population, total (millions)',
       'Population, urban (%)',
       'Mortality rate, under-five (per 1,000 live births)',
       'Mortality rate, infant (per 1,000 live births)',
       'Old age dependency ratio (old age (65 and older) per 100 creatures (ages 15-64))',
       'Population, ages 15–64 (millions)',
       'Population, ages 65 and older (millions)',
       'Life expectancy at birth, male (galactic years)',
       'Life expectancy at birth, female (galactic years)',
       'Population, under age 5 (millions)',
       'Young age (0-14) dependency ratio (per 100 creatures ages 15-64)',
       'Adolescent birth rate (births per 1,000 female creatures ages 15-19)',
       'Total unemployment rate (female to male ratio)',
       'Vulnerable employment (% of total employment)',
       'Unemployment, total (% of labour force)',
       'Employment in agriculture (% of total employment)',
       'Labour force participation rate (% ages 15 and older)',
       'Labour force participation rate (% ages 15 and older), female',
       'Employment in services (% of total employment)',
       'Labour force participation rate (% ages 15 and older), male',
       'Employment to population ratio (% ages 15 and older)',
       'Jungle area (% of total land area)',
       'Share of employment in nonagriculture, female (% of total employment in nonagriculture)',
       'Youth unemployment rate (female to male ratio)',
       'Unemployment, youth (% ages 15–24)',
       'Mortality rate, female grown up (per 1,000 people)',
       'Mortality rate, male grown up (per 1,000 people)',
       'Infants lacking immunization, red hot disease (% of one-galactic year-olds)',
       'Infants lacking immunization, Combination Vaccine (% of one-galactic year-olds)',
       'Gross galactic product (GGP) per capita',
       'Gross galactic product (GGP), total',
       'Outer Galaxies direct investment, net inflows (% of GGP)',
       'Exports and imports (% of GGP)',
       'Share of seats in senate (% held by female)',
       'Natural resource depletion',
       'Mean years of education, female (galactic years)',
       'Mean years of education, male (galactic years)',
       'Expected years of education, female (galactic years)',
       'Expected years of education, male (galactic years)',
       'Maternal mortality ratio (deaths per 100,000 live births)',
       'Renewable energy consumption (% of total final energy consumption)',
       'Estimated gross galactic income per capita, male',
       'Estimated gross galactic income per capita, female',
       'Rural population with access to electricity (%)',
       'Domestic credit provided by financial sector (% of GGP)',
       'Population with at least some secondary education, female (% ages 25 and older)',
       'Population with at least some secondary education, male (% ages 25 and older)',
       'Gross fixed capital formation (% of GGP)',
       'Remittances, inflows (% of GGP)',
       'Population with at least some secondary education (% ages 25 and older)',
       'Intergalactic inbound tourists (thousands)',
       'Gross enrolment ratio, primary (% of primary under-age population)',
       'Respiratory disease incidence (per 100,000 people)',
       'Interstellar phone subscriptions (per 100 people)',
       'Interstellar Data Net users, total (% of population)',
       'Current health expenditure (% of GGP)',
       'Intergalactic Development Index (IDI), female',
       'Intergalactic Development Index (IDI), male',
       'Gender Development Index (GDI)',
       'Intergalactic Development Index (IDI), female, Rank',
       'Intergalactic Development Index (IDI), male, Rank',
       'Adjusted net savings ',
       'Creature Immunodeficiency Disease prevalence, adult (% ages 15-49), total',
       'Private galaxy capital flows (% of GGP)',
      #  'Gender Inequality Index (GII)',
        'y']

In [None]:
train_main = train_main[]

# **Feature Eng**

In [None]:
train_main = train_main.drop(['Population, total (millions)','Population, under age 5 (millions)','Population, ages 65 and older (millions)'],axis=1)

In [None]:
rank_col = ['Intergalactic Development Index (IDI), Rank','Intergalactic Development Index (IDI), female, Rank',
       'Intergalactic Development Index (IDI), male, Rank']
sc = StandardScaler()
sc = sc.fit(train_main[rank_col])
train_main[rank_col] = sc.transform(train_main[rank_col])
test_main[rank_col] = sc.transform(test_main[rank_col])

In [None]:
train_main['Population, ages 5-14 (millions)'] = (train_main['Population, total (millions)'] -
                                                 train_main['Population, under age 5 (millions)'] -
                                                 train_main['Population, ages 15–64 (millions)'] -
                                                 train_main['Population, ages 65 and older (millions)'] )   
                                 
test_main['Population, ages 5-14 (millions)'] = (test_main['Population, total (millions)'] -
                                                 test_main['Population, under age 5 (millions)'] -
                                                 test_main['Population, ages 15–64 (millions)'] -
                                                 test_main['Population, ages 65 and older (millions)'] )   
train_main['neg_pop_trend'] = [-1  if x<0 else 1 for x in train_main['Population, ages 5-14 (millions)']]
test_main['neg_pop_trend'] = [-1  if x<0 else 1 for x in test_main['Population, ages 5-14 (millions)']]

In [None]:
train_main['Manual Old age dependency ratio (old age (65 and older) per 100 creatures (ages 15-64))'] = (
                                                 train_main['Population, ages 65 and older (millions)'] /
                                                 train_main['Population, ages 15–64 (millions)']  )  *100   
                                                 
test_main['Manual Old age dependency ratio (old age (65 and older) per 100 creatures (ages 15-64))'] = (
                                                 test_main['Population, ages 65 and older (millions)'] /
                                                 test_main['Population, ages 15–64 (millions)']  )  *100     

train_main.drop('Old age dependency ratio (old age (65 and older) per 100 creatures (ages 15-64))',axis=1,inplace=True)
test_main.drop('Old age dependency ratio (old age (65 and older) per 100 creatures (ages 15-64))',axis=1,inplace=True)

In [None]:
years_dict = dict(zip(
     [ 990025,  991020,  992016,  993012,  994009,  995006,  996004,
        997002,  998001,  999000, 1000000, 1001000, 1002001, 1003002,
       1004004, 1005006, 1006009, 1007012, 1008016, 1009020, 1010025,
       1011030, 1012036, 1013042, 1014049, 1015056, 1016064],
    [2700,2600,2500,2400,2300,2200,2100,
     2000,1900,1800,1700,1600,1500,1400,
     1300,1200,1100,1000,900,800,700,
     600,500,400,300,200,100]
))

In [None]:
train_main['trend_y'] = train_main['galactic year'].replace(years_dict)
test_main['trend_y'] = test_main['galactic year'].replace(years_dict)

In [None]:
null_trend_dict = dict(zip(
     [ 990025,  991020,  992016,  993012,  994009,  995006,  996004,
        997002,  998001,  999000, 1000000, 1001000, 1002001, 1003002,
       1004004, 1005006, 1006009, 1007012, 1008016, 1009020, 1010025,
       1011030, 1012036, 1013042, 1014049, 1015056, 1016064],
    [2,1,1,1,1,3,1,
     1,1,1,3,1,1,1,
     1,3,1,0,0,0,2,
     2,2,2,2,2,2]
))

In [None]:
train_main['trend_null'] = train_main['galactic year'].replace(null_trend_dict)
test_main['trend_null'] = test_main['galactic year'].replace(null_trend_dict)

# **Train**

In [None]:
def loop(gal,train,test):

  test2 = test.copy()

  test2['index1'] = test2.index
  test2=test2[test2.galaxy==gal]

  # selecting the galaxy
  train = train[train.galaxy==gal]
  test = test[test.galaxy==gal]

  #opposite trend checker
  # print(train)
  # print(test)


  # concating both sets
  concat = pd.concat([train,test])

  # filling null values
  train.fillna(concat.median(),inplace=True)
  test.fillna(concat.median(),inplace=True)
  train.fillna(train.median(),inplace=True)
  test.fillna(test.median(),inplace=True)

  # train_knn = KNN(3).fit_transform(train.to_numpy())
  # train = pd.DataFrame(train_knn,index=train.index,columns=train.columns)
  # test_knn = KNN(3).fit_transform(test.to_numpy())
  # test = pd.DataFrame(test_knn,index=test.index,columns=test.columns)

  # dropping if there is still any null value
  train.dropna(axis=1,how='any',inplace=True)


  # x and y from train data
  y = train['y']
  X = train.drop(['y'],axis=1)

  test = test[X.columns]

  # knn protocol
  if (len(X) < 2):
    print('opps')
    return -2,-2.0,-2

  # train and val split
  X_train,X_val,y_train,y_val = train_test_split(X,y)
  
  index = test2['index1']
  # test = test.drop('index1',axis=1)

  # preprocessing
  X_train = PolynomialFeatures(3).fit_transform(X_train)
  X_val = PolynomialFeatures(3).fit_transform(X_val)
  test = PolynomialFeatures(3).fit_transform(test)


  # model = KNeighborsRegressor(2).fit(X_train,y_train)
  model=GradientBoostingRegressor(loss='lad',learning_rate=0.0001,max_depth=5,n_estimators=10000).fit(X_train,y_train)

  predict_validation = model.predict(X_val)
  error = sqrt(mean_squared_error(y_val,predict_validation))
  predict_test = model.predict(test)

  return predict_test,index.values,error

In [None]:
predicted_list = pd.Series(len(test_main.galaxy)*[0])
error_list = pd.Series(len(test_main.galaxy.unique())*[0.0])
count=0
error=0.0
prediction=[]
index=[]
for gal in test_main.galaxy.unique():
  prediction,index,error = loop(int(gal),train_main,test_main)
  if(error==-2):
    continue
  predicted_list[index.tolist()] = prediction 
  error_list[count] = error 
  count=count+1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._update_inplace(new_data)
A value is trying to be set on a copy of a sl

In [None]:
np.mean(error_list[error_list!=0])

In [None]:
predicted_list[predicted_list!=0]

0      0.044579
1      0.042562
2      0.042282
3      0.030889
4      0.034682
         ...   
885    0.037157
886    0.034358
887    0.071342
888    0.062284
889    0.038405
Length: 890, dtype: float64

In [None]:
y_final = predicted_list

In [None]:
y_final=len(predicted_list)*[0]
count=0
for p in predicted_list:
  y_final[count] = p
  count=count+1
y_final

[0.0433237958346801,
 0.040578011612493636,
 0.0402779629018436,
 0.04012666517378877,
 0.024153926952257265,
 0.040441709889047335,
 0.029603360576176074,
 0.04275942586858045,
 0.042570125558423766,
 0.034774607488226306,
 0.03201234429998495,
 0.030012020479992133,
 0.04275942586858045,
 0.04283066121381047,
 0.033285352634748244,
 0.042570125558423766,
 0.032969318927781266,
 0.02312759416770288,
 0.0402779629018436,
 0.04096754865723701,
 0.031921489431722794,
 0.039878205121561316,
 0.03901083646352144,
 0.040441709889047335,
 0.04319955505464751,
 0.038944373292966794,
 0.029603360576176074,
 0.030888553031833025,
 0.044384783027739017,
 0.0433237958346801,
 0.04239308428535332,
 0.040441709889047335,
 0.04283066121381047,
 0.06011468424727734,
 0.029603360576176074,
 0.04114031613809116,
 0.04410873253872,
 0.029603360576176074,
 0.030012020479992133,
 0.04041516431669634,
 0.040787944648606855,
 0.02578843577443396,
 0.040787944648606855,
 0.042570125558423766,
 0.060114684247

# **Part B**

In [None]:
index = np.array(y_final)
pot_inc = -np.log(index+0.01)+3

In [None]:
p2= pot_inc**2

In [None]:
ss = pd.DataFrame({
    'Index':test.index,
    'pred': y_final,
    'opt_pred':0,
    'eei':test['existence expectancy index'], # So we can split into low and high EEI galaxies
})

In [None]:
test['existence expectancy index'][:-3]

0      0.456086
1      0.529835
2      0.560976
3      0.565910
4      0.588274
         ...   
882    1.196506
883    1.199043
884    1.250508
885         NaN
886         NaN
Name: existence expectancy index, Length: 887, dtype: float64

In [None]:
ss.head()

Unnamed: 0,Index,pred,opt_pred,eei
0,0,0.043324,0,0.456086
1,1,0.040578,0,0.529835
2,2,0.040278,0,0.560976
3,3,0.040127,0,0.56591
4,4,0.024154,0,0.588274


In [None]:
# ss.loc[np.array([p2.tolist().index(x)  for x in p2[np.argsort(p2)[-400:]]]), 'opt_pred']=100
ss.loc[np.argsort(p2)[-400:], 'opt_pred']=100
count=0
for g in range(11,101):
  ss.loc[np.argsort(p2)[-401-count:-400-count], 'opt_pred']=100-count
  count=count+1
# ss=ss.sort_values('pred')
# ss.iloc[400:600].opt_pred = 60
ss=ss.sort_index()

In [None]:
increase = (ss['opt_pred']*p2)/1000

In [None]:
print(sum(increase), ss.loc[ss.eei < 0.7, 'opt_pred'].sum(), ss['opt_pred'].sum())

1607.1014736994962 6400 44995


In [None]:
ss.to_csv('submission.csv', index=False)

In [None]:
from google.colab import files
files.download("submission.csv")

In [None]:
len(ss.loc[ss.eei < 0.7, 'opt_pred'])

66

# **train with all loop**

In [None]:
df = pd.DataFrame([[1,2,3],[1,2,3],[1,2,3],[1,2,2]])
df2 = pd.DataFrame([[4,5,6],[4,5,6],[4,5,6]])

pd.concat([df,df2])

Unnamed: 0,0,1,2
0,1,2,3
1,1,2,3
2,1,2,3
3,1,2,2
0,4,5,6
1,4,5,6
2,4,5,6


In [None]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [None]:
galaxies_dict=dict(zip(
    [x for x in range(len(train['galaxy'].unique()))],
    train['galaxy'].unique()))

train.galaxy = [k for x in train.galaxy for k,v in galaxies_dict.items() if x==v]

In [None]:
galaxies_dict_test=dict(zip(
    [x for x in range(len(test['galaxy'].unique()))],
    test['galaxy'].unique()))

test.galaxy = [k for x in test.galaxy for k,v in galaxies_dict.items() if x==v]

In [None]:
def loop3(gal,train,test):

  # making column for index
  test['index1']=test.index

  #selecting only one galaxy
  train = train[train.galaxy==gal]
  test = test[test.galaxy==gal]

  concat = pd.concat([train.drop('y',axis=1),test.drop('index1',axis=1)])

  # #filling with galaxy's median values
  train.fillna(concat.median(),inplace=True)
  test.fillna(concat.median(),inplace=True)

  # train_knn = KNN(3).fit_transform(train.to_numpy())
  # train = pd.DataFrame(train_knn,index=train.index,columns=train.columns)

  # test_knn = KNN(3).fit_transform(test.to_numpy())
  # test = pd.DataFrame(test_knn,index=test.index,columns=test.columns)

  #dropping null columns
  train.dropna(axis=1,how='all',inplace=True)
  test.dropna(axis=1,how='all',inplace=True)

  # making training data
  y_train = train['y']
  X_train = train.drop(['y'],axis=1)

  # KNN protocol
  if (len(X_train) < 3):
    print('opps')
    return -2,-2.0,-2

  # dont need this
  # X_train,X_test,y_train,y_test = train_test_split(X_train,y_train)

  # getting the index of galaxies
  index = test['index1']

  # removing the index column
  test.drop('index1',axis=1,inplace=True)
  # X_train.drop('index1',axis=1,inplace=True)

  # preprocessing using PolynomialFeatures
  X_train = PolynomialFeatures(3).fit_transform(X_train)
  test = PolynomialFeatures(3).fit_transform(test)

  print(X_train.shape,test.shape)

  model=KNeighborsRegressor(2).fit(X_train,y_train)

  predict_test = model.predict(test)
  # error = sqrt(mean_squared_error(y_test,predict_test))


  return predict_test,index.values,0

In [None]:
predicted_list = pd.Series(len(test.galaxy)*[0])
error_list = pd.Series(len(test.galaxy.unique())*[0.0])
count=0
error=0.0
prediction=[]
index=[]
for gal in test.galaxy.unique():
  prediction,index,error = loop3(int(gal),train,test)
  if(error==-2):
    continue
  predicted_list[index.tolist()] = prediction 
  error_list[count] = error 
  count=count+1

Imputing row 1/19 with 26 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/19 with 39 missing, elapsed time: 0.001
Imputing row 1/7 with 65 missing, elapsed time: 0.000
(19, 88560) (7, 88560)
Imputing row 1/19 with 42 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/24 with 27 missing, elapsed time: 0.001
Imputing row 1/3 with 6 missing, elapsed time: 0.000
(24, 88560) (3, 88560)
Imputing row 1/17 with 27 missing, elapsed time: 0.001
Imputing row 1/10 with 0 missing, elapsed time: 0.000
(17, 88560) (10, 88560)
Imputing row 1/21 with 36 missing, elapsed time: 0.001
Imputing row 1/5 with 65 missing, elapsed time: 0.000
(21, 88560) (5, 88560)
Imputing row 1/21 with 30 missing, elapsed time: 0.001
Imputing row 1/5 with 65 missing, elapsed time: 0.000
(21, 88560) (5, 88560)
Imputing row 1/18 with 29 missing, elapsed time: 0.001
Imputing row 1/9 with



Imputing row 1/2 with 0 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/25 with 45 missing, elapsed time: 0.001
Imputing row 1/1 with 15 missing, elapsed time: 0.000
(25, 88560) (1, 88560)
Imputing row 1/18 with 29 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/21 with 27 missing, elapsed time: 0.001
Imputing row 1/5 with 66 missing, elapsed time: 0.000
(21, 88560) (5, 88560)
Imputing row 1/19 with 28 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/19 with 26 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/18 with 30 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/21 with 26 missing, elapsed time: 0.001
Imputing row 1/5 with 65 missing, elapsed time: 0.000
(21, 88560) (5, 88560)




(18, 88560) (8, 88560)
Imputing row 1/23 with 29 missing, elapsed time: 0.001
Imputing row 1/4 with 2 missing, elapsed time: 0.000
(23, 88560) (4, 88560)
Imputing row 1/19 with 38 missing, elapsed time: 0.001
Imputing row 1/7 with 5 missing, elapsed time: 0.000
(19, 88560) (7, 88560)
Imputing row 1/25 with 33 missing, elapsed time: 0.001
Imputing row 1/2 with 5 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/24 with 28 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000




(24, 88560) (2, 88560)
Imputing row 1/17 with 27 missing, elapsed time: 0.001
Imputing row 1/10 with 3 missing, elapsed time: 0.001
(17, 88560) (10, 88560)
Imputing row 1/24 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/18 with 41 missing, elapsed time: 0.001
Imputing row 1/8 with 30 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/18 with 30 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/24 with 41 missing, elapsed time: 0.001
Imputing row 1/2 with 2 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/19 with 42 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/18 with 39 missing, elapsed time: 0.001
Imputing row 1/8 with 0 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/19 with 35 missing, elapsed time: 0.001




Imputing row 1/3 with 3 missing, elapsed time: 0.001
(24, 88560) (3, 88560)
Imputing row 1/19 with 26 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/19 with 40 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.001
(19, 88560) (8, 88560)
Imputing row 1/18 with 27 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/19 with 35 missing, elapsed time: 0.002
Imputing row 1/8 with 2 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/25 with 29 missing, elapsed time: 0.001
Imputing row 1/2 with 2 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/22 with 34 missing, elapsed time: 0.001
Imputing row 1/5 with 9 missing, elapsed time: 0.000
(22, 88560) (5, 88560)
Imputing row 1/24 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Impu



(21, 88560) (5, 88560)
Imputing row 1/18 with 30 missing, elapsed time: 0.001
Imputing row 1/9 with 65 missing, elapsed time: 0.000
(18, 88560) (9, 88560)
Imputing row 1/20 with 50 missing, elapsed time: 0.001
Imputing row 1/6 with 19 missing, elapsed time: 0.000
(20, 88560) (6, 88560)
Imputing row 1/19 with 28 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/25 with 32 missing, elapsed time: 0.001
Imputing row 1/2 with 3 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/22 with 28 missing, elapsed time: 0.001
Imputing row 1/5 with 2 missing, elapsed time: 0.000
(22, 88560) (5, 88560)
Imputing row 1/24 with 40 missing, elapsed time: 0.001
Imputing row 1/2 with 15 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/20 with 35 missing, elapsed time: 0.001
Imputing row 1/6 with 0 missing, elapsed time: 0.000
(20, 88560) (6, 88560)
Imputing row 1/24 with 40 missing, elapsed time: 0.001
I



(19, 88560) (7, 88560)
Imputing row 1/24 with 41 missing, elapsed time: 0.001
Imputing row 1/2 with 4 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/22 with 32 missing, elapsed time: 0.001
Imputing row 1/5 with 0 missing, elapsed time: 0.000
(22, 88560) (5, 88560)
Imputing row 1/24 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/25 with 36 missing, elapsed time: 0.001
Imputing row 1/2 with 5 missing, elapsed time: 0.000
(25, 88560) (2, 88560)




Imputing row 1/24 with 42 missing, elapsed time: 0.001
Imputing row 1/3 with 3 missing, elapsed time: 0.000
(24, 88560) (3, 88560)
Imputing row 1/19 with 29 missing, elapsed time: 0.001
Imputing row 1/6 with 5 missing, elapsed time: 0.000
(19, 88560) (6, 88560)
Imputing row 1/21 with 53 missing, elapsed time: 0.001
Imputing row 1/5 with 8 missing, elapsed time: 0.000
(21, 88560) (5, 88560)
Imputing row 1/22 with 28 missing, elapsed time: 0.001
Imputing row 1/5 with 0 missing, elapsed time: 0.000
(22, 88560) (5, 88560)
Imputing row 1/19 with 32 missing, elapsed time: 0.001
Imputing row 1/8 with 15 missing, elapsed time: 0.001
(19, 88560) (8, 88560)
Imputing row 1/24 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/18 with 28 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000




(18, 88560) (8, 88560)
Imputing row 1/19 with 33 missing, elapsed time: 0.001
Imputing row 1/8 with 0 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/24 with 45 missing, elapsed time: 0.001
Imputing row 1/2 with 6 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/19 with 29 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/19 with 44 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/22 with 66 missing, elapsed time: 0.001
Imputing row 1/5 with 40 missing, elapsed time: 0.000
(22, 88560) (5, 88560)
Imputing row 1/25 with 34 missing, elapsed time: 0.001
Imputing row 1/2 with 11 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/24 with 43 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000




(24, 88560) (2, 88560)
Imputing row 1/18 with 29 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/25 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/17 with 38 missing, elapsed time: 0.001
Imputing row 1/10 with 2 missing, elapsed time: 0.000
(17, 88560) (10, 88560)
Imputing row 1/24 with 24 missing, elapsed time: 0.001
Imputing row 1/2 with 3 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/17 with 28 missing, elapsed time: 0.001
Imputing row 1/10 with 0 missing, elapsed time: 0.001
(17, 88560) (10, 88560)
Imputing row 1/19 with 28 missing, elapsed time: 0.001
Imputing row 1/7 with 65 missing, elapsed time: 0.000
(19, 88560) (7, 88560)
Imputing row 1/17 with 26 missing, elapsed time: 0.001
Imputing row 1/10 with 0 missing, elapsed time: 0.001
(17, 88560) (10, 88560)
Imputing row 1/19 with 29 missing, elapsed time: 0.0



(22, 88560) (5, 88560)
Imputing row 1/25 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 2 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/22 with 27 missing, elapsed time: 0.001
Imputing row 1/5 with 0 missing, elapsed time: 0.000
(22, 88560) (5, 88560)
Imputing row 1/18 with 38 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/24 with 27 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/24 with 43 missing, elapsed time: 0.001
Imputing row 1/2 with 15 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/24 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 1 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/19 with 36 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/24 with 27 missing, elapsed time: 0.001
Im



Imputing row 1/18 with 26 missing, elapsed time: 0.001
Imputing row 1/9 with 0 missing, elapsed time: 0.000
(18, 88560) (9, 88560)
Imputing row 1/25 with 28 missing, elapsed time: 0.001
Imputing row 1/2 with 3 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/20 with 26 missing, elapsed time: 0.001
Imputing row 1/6 with 0 missing, elapsed time: 0.000
(20, 88560) (6, 88560)
Imputing row 1/23 with 33 missing, elapsed time: 0.001
Imputing row 1/2 with 5 missing, elapsed time: 0.000
(23, 88560) (2, 88560)
Imputing row 1/25 with 36 missing, elapsed time: 0.001
Imputing row 1/2 with 2 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/22 with 26 missing, elapsed time: 0.001
Imputing row 1/5 with 0 missing, elapsed time: 0.001
(22, 88560) (5, 88560)
Imputing row 1/25 with 27 missing, elapsed time: 0.001
Imputing row 1/2 with 2 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/19 with 33 missing, elapsed time: 0.001
Imputing row 1/8 with 65 mis



(21, 88560) (5, 88560)
Imputing row 1/19 with 28 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.001
(19, 88560) (8, 88560)
Imputing row 1/24 with 36 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000




(24, 88560) (2, 88560)
Imputing row 1/24 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 3 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/19 with 29 missing, elapsed time: 0.001
Imputing row 1/8 with 2 missing, elapsed time: 0.000
(19, 88560) (8, 88560)
Imputing row 1/19 with 27 missing, elapsed time: 0.001
Imputing row 1/7 with 65 missing, elapsed time: 0.000
(19, 88560) (7, 88560)
Imputing row 1/25 with 27 missing, elapsed time: 0.001
Imputing row 1/2 with 1 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/19 with 26 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.001
(19, 88560) (8, 88560)
Imputing row 1/18 with 26 missing, elapsed time: 0.001
Imputing row 1/8 with 0 missing, elapsed time: 0.000
(18, 88560) (8, 88560)
Imputing row 1/18 with 28 missing, elapsed time: 0.001
Imputing row 1/8 with 65 missing, elapsed time: 0.001
(18, 88560) (8, 88560)
Imputing row 1/25 with 28 missing, elapsed time: 0.001
Im



(25, 88560) (1, 88560)
Imputing row 1/25 with 27 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/24 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 1 missing, elapsed time: 0.000
(24, 88560) (2, 88560)
Imputing row 1/25 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 1 missing, elapsed time: 0.000
(25, 88560) (2, 88560)
Imputing row 1/25 with 26 missing, elapsed time: 0.001
Imputing row 1/2 with 0 missing, elapsed time: 0.000
(25, 88560) (2, 88560)


In [None]:
error_list

0      0.0
1      0.0
2      0.0
3      0.0
4      0.0
      ... 
167    0.0
168    0.0
169    0.0
170    0.0
171    0.0
Length: 172, dtype: float64

In [None]:
np.mean(error_list[error_list!=0])

nan

In [None]:
predicted_list

0      0.044065
1      0.029603
2      0.029603
3      0.041814
4      0.034682
         ...   
885    0.028160
886    0.034358
887    0.071342
888    0.066937
889    0.037384
Length: 890, dtype: float64

In [None]:
y_final = predicted_list

In [None]:
y_final=len(predicted_list)*[0]
count=0
for p in predicted_list:
  y_final[count] = p
  count=count+1
y_final

[0.044064534161590174,
 0.029603360576176074,
 0.029603360576176074,
 0.04181368502724864,
 0.034681608449540435,
 0.04018604949486299,
 0.029603360576176074,
 0.03840469475725495,
 0.033506232312827566,
 0.034774607488226306,
 0.03092708573354697,
 0.030012020479992133,
 0.03840469475725495,
 0.04283066121381047,
 0.033285352634748244,
 0.033506232312827566,
 0.03817051133613453,
 0.033506232312827566,
 0.029603360576176074,
 0.02623971136997333,
 0.02623971136997333,
 0.04107876203958977,
 0.04524200415602753,
 0.04018604949486299,
 0.04424373367478346,
 0.038944373292966794,
 0.029603360576176074,
 0.03997422127893152,
 0.044384783027739017,
 0.03738388781581853,
 0.033143277481379685,
 0.04018604949486299,
 0.04283066121381047,
 0.04054276822111647,
 0.029603360576176074,
 0.03092708573354697,
 0.042612071711999544,
 0.029603360576176074,
 0.030012020479992133,
 0.03953644220989172,
 0.041547483696661225,
 0.025715844724171877,
 0.041547483696661225,
 0.033506232312827566,
 0.04054

In [None]:
composite_index=['existence expectancy index',
                     'Income Index',
                     'Intergalactic Development Index (IDI)',
                     'Education Index',
                    #  'Intergalactic Development Index (IDI), Rank',
                    #  'Intergalactic Development Index (IDI), female',
                    #  'Intergalactic Development Index (IDI), male',
                     'Gender Development Index (GDI)',
                    #  'Intergalactic Development Index (IDI), female, Rank',
                    #  'Intergalactic Development Index (IDI), male, Rank',
                     'Gender Inequality Index (GII)']