In [2]:
import numpy as np
import os
import pandas as pd
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
# import seaborn as sns
import pickle
import time
import gc
from tqdm import tqdm, tqdm_notebook

%matplotlib inline

#每次可以输出多个变量
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from pylab import rcParams
rcParams['figure.figsize'] = 14, 6

import warnings
warnings.filterwarnings("ignore")

#中文字体
import matplotlib
matplotlib.use('qt4agg')
#指定默认字体
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family'] = 'sans-serif'
#解决负号'-'显示为方块的问题
matplotlib.rcParams['axes.unicode_minus'] = False

In [3]:
df = pd.read_excel('./data/data.xlsx')

In [4]:
# 删除缺失值太多的列
drop_col = list(df.isnull().sum()[df.isnull().sum() >= 80000].index)
df = df.drop(drop_col, axis=1)

In [5]:
# 删除描述列
drop_col = [i for i in df.columns if 'txt' in i]
df = df.drop(drop_col, axis=1)

In [6]:
# 填充缺失值

# one-hot类型
df.provstate = df.provstate.fillna('No Provstate')
df.city = df.city.fillna('No City')
df.specificity = df.specificity.fillna(-1)
df.doubtterr = df.doubtterr.fillna(-1)
df.multiple = df.multiple.fillna(-1)
df.targsubtype1 = df.targsubtype1.fillna(-1)
df.corp1 = df.corp1.fillna('No Corp1')
df.target1 = df.target1.fillna('No Target1')
df.natlty1 = df.natlty1.fillna(-1)
df.guncertain1 = df.guncertain1.fillna(-1)
df.weapsubtype1 = df.weapsubtype1.fillna(-1)
df.propextent = df.propextent.fillna(4)
df.ishostkid = df.ishostkid.fillna(-9)

# txt类型
df.location = df.location.fillna('Unknown')
df.summary = df.summary.fillna('Unknown')
df.motive = df.motive.fillna('Unknown')
df.propcomment = df.propcomment.fillna('Unknown')
df.weapdetail = df.weapdetail.fillna('Unknown')
df.scite1 = df.scite1.fillna('Unknown')
df.scite2 = df.scite2.fillna('Unknown')
df.scite3 = df.scite3.fillna('Unknown')

# 连续型
df.latitude = df.latitude.fillna(df.latitude.mean())
df.longitude = df.longitude.fillna(df.longitude.mean())
df.nperps = df.nperps.fillna(int(df.nperps.mean()))
df.nperpcap = df.nperpcap.fillna(int(df.nperpcap.mean()))
df.nkill = df.nkill.fillna(int(df.nkill.mean()))
df.nkillus = df.nkillus.fillna(int(df.nkillus.mean()))
df.nkillter = df.nkillter.fillna(int(df.nkillter.mean()))
df.nwound = df.nwound.fillna(int(df.nwound.mean()))
df.nwoundus = df.nwoundus.fillna(int(df.nwoundus.mean()))
df.nwoundte = df.nwoundte.fillna(int(df.nwoundte.mean()))

In [7]:
df.shape
df.head()

(114183, 56)

Unnamed: 0,eventid,iyear,imonth,iday,extended,country,region,provstate,city,latitude,longitude,specificity,vicinity,location,summary,crit1,crit2,crit3,doubtterr,multiple,success,suicide,attacktype1,targtype1,targsubtype1,corp1,target1,natlty1,gname,motive,guncertain1,individual,nperps,nperpcap,claimed,weaptype1,weapsubtype1,weapdetail,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,property,propextent,propcomment,ishostkid,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY
0,199801010001,1998,1,1,0,34,11,Bujumbura Mairie,Bujumbura,-3.375828,29.364107,1.0,0,The incident occurred at Bujumbura Airport.,01/01/1998: Hutu Rebels attacked a Burundi mil...,1,1,1,1.0,0.0,1,0,2,4,34.0,Burundi Military,Burundi Military at Bujumbura Airport,34.0,Hutu extremists,Unknown,0.0,0,100.0,0.0,0,5,5.0,Unknown,104.0,0.0,100.0,6.0,0.0,0.0,-9,4.0,Unknown,0.0,"“Burundi Rebels, Ex-Rwandan Army Soldiers Blam...",“Burundi--Attack Reported on Bujumbura Airport...,Unknown,CETIS,0,1,0,1
1,199801010002,1998,1,1,0,167,9,Moscow (Federal City),Moscow,55.751377,37.579914,1.0,0,Unknown,"01/01/1998: In Russia, a small bomb hidden in ...",1,1,1,0.0,0.0,1,0,3,19,102.0,Moscow Metro System,An underground passage in the Moscow Metro,167.0,Unknown,Unknown,0.0,0,-76.0,0.0,0,6,14.0,Unknown,0.0,0.0,0.0,3.0,0.0,0.0,0,4.0,Unknown,0.0,"“Bomb injures 3 in Moscow subway system,” The ...","“Bomb injures 3 in Moscow subway,” Charleston ...","“Bomb Injures 3 Workers in Moscow Metro,” Los ...",CETIS,-9,-9,0,-9
2,199801010003,1998,1,1,0,603,8,Northern Ireland,Belfast,54.607712,-5.95621,1.0,0,Belfast (Capital City),01/01/1998: The breakaway Loyalist Volunteer F...,1,1,1,0.0,0.0,1,0,2,14,68.0,Civilian,Eddie Trainor,233.0,Loyalist Volunteer Forces (LVF),Unknown,0.0,0,-76.0,0.0,1,5,2.0,Unknown,1.0,0.0,0.0,0.0,0.0,0.0,0,4.0,Unknown,0.0,“Protestant gunmen kill Catholic in New Year's...,“Ulster Peace Shattered by Shooting: Catholic ...,Unknown,CETIS,0,0,1,1
3,199801020001,1998,1,2,0,95,10,Baghdad,Baghdad,33.303566,44.371773,1.0,0,Unknown,01/02/1998: The United Nations Special Commiss...,1,1,1,0.0,0.0,1,0,3,7,47.0,United Nations,UNSCOM Office Building in Baghdad,999.0,Unknown,It was believed this attack was meant to intim...,0.0,0,-76.0,0.0,0,6,11.0,Unknown,0.0,0.0,0.0,0.0,0.0,0.0,1,3.0,The attack caused damage to a security checkpo...,0.0,“Iraq Condemns Attack on UNSCOM Baghdad Office...,"Farouk Choukri , “Iraq, UN Officials Continue ...","“Iraqi Interior Minister on UNSCOM Attack, Kuw...",CETIS,-9,-9,1,1
4,199801020002,1998,1,2,0,155,10,West Bank,Unknown,31.995965,35.27111,4.0,0,Unknown,01/02/1998: An Israeli woman was critically wo...,1,1,1,0.0,0.0,0,0,2,14,71.0,Civilian,Israeli Civilian,97.0,Unknown,Unknown,0.0,0,-76.0,0.0,0,5,2.0,Unknown,0.0,0.0,0.0,1.0,0.0,0.0,0,4.0,Unknown,0.0,"“Woman Shot,” The Philadelphia Inquirer, Janua...",“Israeli Woman Critically Hurt by Gunfire in W...,Unknown,CETIS,-9,-9,0,-9


In [8]:
# 列分类
zeroone_col = ['extended', 'crit1', 'crit2', 'crit3', 'success', 'suicide', 'weapdetail', 'individual']
onehot_col = ['specificity', 'country', 'region', 'vicinity', 'doubtterr', 'multiple',
              'attacktype1', 'targtype1', 'targsubtype1', 'guncertain1', 'weaptype1', 'weapsubtype1', 'property', 'propextent', 
              'ishostkid', 'dbsource', 'natlty1', 'INT_LOG', 'INT_MISC', 'INT_ANY', 'INT_IDEO', 'claimed']
many_onehot_col = ['corp1', 'target1', 'gname', 'provstate', 'city']
txt_col = ['location', 'summary', 'scite1', 'scite2', 'scite3', 'motive', 'propcomment']
cont_col = ['iyear', 'imonth', 'iday', 'latitude', 'longitude', 'nperps', 'nperpcap', 'nkill', 'nkillus', 'nkillter', 'nwound', 
            'nwoundus', 'nwoundte']

full_col = zeroone_col+onehot_col+many_onehot_col+txt_col+cont_col
len(full_col)

55

## one-hot

In [9]:
for i in tqdm_notebook(onehot_col):
    tmp = pd.get_dummies(df[i], prefix=i)
    df = pd.concat([df, tmp], axis=1)
    df = df.drop([i], axis=1)

A Jupyter Widget




In [10]:
df.shape
df.head()

(114183, 648)

Unnamed: 0,eventid,iyear,imonth,iday,extended,provstate,city,latitude,longitude,location,summary,crit1,crit2,crit3,success,suicide,corp1,target1,gname,motive,individual,nperps,nperpcap,weapdetail,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,propcomment,scite1,scite2,scite3,specificity_-1.0,specificity_1.0,specificity_2.0,specificity_3.0,specificity_4.0,specificity_5.0,country_4,country_5,country_6,country_8,country_11,country_12,country_14,country_15,country_16,country_17,country_18,country_19,country_21,country_22,country_23,country_25,country_26,country_28,country_30,country_32,country_33,country_34,country_35,country_36,country_37,country_38,country_41,country_42,country_43,country_44,country_45,country_47,country_49,country_50,country_51,country_53,country_54,country_55,country_56,country_58,country_59,country_60,country_62,country_63,country_64,country_65,country_67,country_68,country_69,country_72,country_73,country_74,country_75,country_76,country_78,country_83,country_84,country_85,country_86,country_87,country_88,country_89,country_90,country_91,country_92,country_93,country_94,country_95,country_96,country_97,country_98,country_99,country_100,country_101,country_102,country_103,country_104,country_106,country_107,country_108,country_109,country_110,country_111,country_112,country_113,country_117,country_118,country_119,country_120,country_121,country_122,country_123,country_124,country_128,country_130,country_132,country_136,country_137,country_138,country_139,country_141,country_142,country_144,country_145,country_146,country_147,country_151,country_153,country_155,country_156,country_157,country_158,country_159,country_160,country_161,country_162,country_164,country_166,country_167,country_168,country_173,country_174,country_175,country_177,country_179,country_180,country_181,country_182,country_183,country_184,country_185,country_186,country_190,country_195,country_197,country_198,country_199,country_200,country_201,country_202,country_203,country_204,country_205,country_207,country_208,country_209,country_210,country_213,country_214,country_215,country_217,country_218,country_219,country_222,country_223,country_228,country_229,country_230,country_231,country_235,country_347,country_349,country_422,country_603,country_1001,country_1002,country_1003,country_1004,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12,vicinity_-9,vicinity_0,vicinity_1,doubtterr_-1.0,doubtterr_0.0,doubtterr_1.0,multiple_-1.0,multiple_0.0,multiple_1.0,attacktype1_1,attacktype1_2,attacktype1_3,attacktype1_4,attacktype1_5,attacktype1_6,attacktype1_7,attacktype1_8,attacktype1_9,targtype1_1,targtype1_2,targtype1_3,targtype1_4,targtype1_5,targtype1_6,targtype1_7,targtype1_8,targtype1_9,targtype1_10,targtype1_11,targtype1_12,targtype1_13,targtype1_14,targtype1_15,targtype1_16,targtype1_17,targtype1_18,targtype1_19,targtype1_20,targtype1_21,targtype1_22,targsubtype1_-1.0,targsubtype1_1.0,targsubtype1_2.0,targsubtype1_3.0,targsubtype1_4.0,targsubtype1_5.0,targsubtype1_6.0,targsubtype1_7.0,targsubtype1_8.0,targsubtype1_9.0,targsubtype1_10.0,targsubtype1_11.0,targsubtype1_12.0,targsubtype1_13.0,targsubtype1_14.0,targsubtype1_15.0,targsubtype1_16.0,targsubtype1_17.0,targsubtype1_18.0,targsubtype1_19.0,targsubtype1_20.0,targsubtype1_21.0,targsubtype1_22.0,targsubtype1_23.0,targsubtype1_24.0,targsubtype1_25.0,targsubtype1_26.0,targsubtype1_27.0,targsubtype1_28.0,targsubtype1_29.0,targsubtype1_30.0,targsubtype1_31.0,targsubtype1_32.0,targsubtype1_33.0,targsubtype1_34.0,targsubtype1_35.0,targsubtype1_36.0,targsubtype1_37.0,targsubtype1_39.0,targsubtype1_40.0,targsubtype1_41.0,targsubtype1_42.0,targsubtype1_43.0,targsubtype1_44.0,targsubtype1_45.0,targsubtype1_46.0,targsubtype1_47.0,targsubtype1_48.0,targsubtype1_49.0,targsubtype1_50.0,targsubtype1_51.0,targsubtype1_52.0,targsubtype1_53.0,targsubtype1_54.0,targsubtype1_55.0,targsubtype1_56.0,targsubtype1_57.0,targsubtype1_58.0,targsubtype1_59.0,targsubtype1_60.0,targsubtype1_61.0,targsubtype1_62.0,targsubtype1_63.0,targsubtype1_64.0,targsubtype1_65.0,targsubtype1_66.0,targsubtype1_67.0,targsubtype1_68.0,targsubtype1_69.0,targsubtype1_70.0,targsubtype1_71.0,targsubtype1_72.0,targsubtype1_73.0,targsubtype1_74.0,targsubtype1_75.0,targsubtype1_76.0,targsubtype1_77.0,targsubtype1_78.0,targsubtype1_79.0,targsubtype1_80.0,targsubtype1_81.0,targsubtype1_82.0,targsubtype1_83.0,targsubtype1_84.0,targsubtype1_85.0,targsubtype1_86.0,targsubtype1_87.0,targsubtype1_88.0,targsubtype1_89.0,targsubtype1_90.0,targsubtype1_91.0,targsubtype1_92.0,targsubtype1_93.0,targsubtype1_94.0,targsubtype1_95.0,targsubtype1_96.0,targsubtype1_97.0,targsubtype1_98.0,targsubtype1_99.0,targsubtype1_100.0,targsubtype1_101.0,targsubtype1_102.0,targsubtype1_103.0,targsubtype1_104.0,targsubtype1_105.0,targsubtype1_106.0,targsubtype1_107.0,targsubtype1_108.0,targsubtype1_109.0,targsubtype1_110.0,targsubtype1_111.0,targsubtype1_112.0,targsubtype1_113.0,guncertain1_-1.0,guncertain1_0.0,guncertain1_1.0,weaptype1_1,weaptype1_2,weaptype1_3,weaptype1_5,weaptype1_6,weaptype1_7,weaptype1_8,weaptype1_9,weaptype1_10,weaptype1_11,weaptype1_12,weaptype1_13,weapsubtype1_-1.0,weapsubtype1_1.0,weapsubtype1_2.0,weapsubtype1_3.0,weapsubtype1_4.0,weapsubtype1_5.0,weapsubtype1_6.0,weapsubtype1_7.0,weapsubtype1_8.0,weapsubtype1_9.0,weapsubtype1_10.0,weapsubtype1_11.0,weapsubtype1_12.0,weapsubtype1_13.0,weapsubtype1_14.0,weapsubtype1_15.0,weapsubtype1_16.0,weapsubtype1_17.0,weapsubtype1_18.0,weapsubtype1_19.0,weapsubtype1_20.0,weapsubtype1_21.0,weapsubtype1_22.0,weapsubtype1_23.0,weapsubtype1_24.0,weapsubtype1_26.0,weapsubtype1_27.0,weapsubtype1_28.0,weapsubtype1_29.0,weapsubtype1_30.0,weapsubtype1_31.0,property_-9,property_0,property_1,propextent_1.0,propextent_2.0,propextent_3.0,propextent_4.0,ishostkid_-9.0,ishostkid_0.0,ishostkid_1.0,dbsource_Anti-Abortion Project 2010,dbsource_CETIS,dbsource_Eco Project 2010,dbsource_Hewitt Project,dbsource_ISVG,dbsource_Leuprecht Canadian Data,dbsource_START Primary Collection,dbsource_Sageman,dbsource_UMD Algeria 2010-2012,dbsource_UMD Assassinations Project,dbsource_UMD Black Widows 2011,dbsource_UMD Encyclopedia of World Terrorism 2012,dbsource_UMD JTMM Nepal 2012,dbsource_UMD Miscellaneous,dbsource_UMD Schmid 2012,dbsource_UMD Sri Lanka 2011,natlty1_-1.0,natlty1_4.0,natlty1_5.0,natlty1_6.0,natlty1_8.0,natlty1_11.0,natlty1_12.0,natlty1_14.0,natlty1_15.0,natlty1_16.0,natlty1_17.0,natlty1_18.0,natlty1_19.0,natlty1_21.0,natlty1_22.0,natlty1_23.0,natlty1_24.0,natlty1_25.0,natlty1_26.0,natlty1_28.0,natlty1_30.0,natlty1_31.0,natlty1_32.0,natlty1_33.0,natlty1_34.0,natlty1_35.0,natlty1_36.0,natlty1_37.0,natlty1_38.0,natlty1_41.0,natlty1_42.0,natlty1_43.0,natlty1_44.0,natlty1_45.0,natlty1_47.0,natlty1_49.0,natlty1_50.0,natlty1_51.0,natlty1_53.0,natlty1_54.0,natlty1_55.0,natlty1_56.0,natlty1_58.0,natlty1_59.0,natlty1_60.0,natlty1_62.0,natlty1_63.0,natlty1_64.0,natlty1_65.0,natlty1_67.0,natlty1_68.0,natlty1_69.0,natlty1_72.0,natlty1_73.0,natlty1_74.0,natlty1_75.0,natlty1_76.0,natlty1_78.0,natlty1_83.0,natlty1_84.0,natlty1_85.0,natlty1_86.0,natlty1_87.0,natlty1_88.0,natlty1_89.0,natlty1_90.0,natlty1_91.0,natlty1_92.0,natlty1_93.0,natlty1_94.0,natlty1_95.0,natlty1_96.0,natlty1_97.0,natlty1_98.0,natlty1_99.0,natlty1_100.0,natlty1_101.0,natlty1_102.0,natlty1_103.0,natlty1_104.0,natlty1_106.0,natlty1_107.0,natlty1_108.0,natlty1_109.0,natlty1_110.0,natlty1_111.0,natlty1_112.0,natlty1_113.0,natlty1_115.0,natlty1_116.0,natlty1_118.0,natlty1_119.0,natlty1_120.0,natlty1_121.0,natlty1_122.0,natlty1_123.0,natlty1_124.0,natlty1_126.0,natlty1_128.0,natlty1_130.0,natlty1_132.0,natlty1_134.0,natlty1_136.0,natlty1_137.0,natlty1_138.0,natlty1_139.0,natlty1_141.0,natlty1_142.0,natlty1_144.0,natlty1_145.0,natlty1_146.0,natlty1_147.0,natlty1_149.0,natlty1_151.0,natlty1_152.0,natlty1_153.0,natlty1_155.0,natlty1_156.0,natlty1_157.0,natlty1_158.0,natlty1_159.0,natlty1_160.0,natlty1_161.0,natlty1_162.0,natlty1_163.0,natlty1_164.0,natlty1_166.0,natlty1_167.0,natlty1_168.0,natlty1_169.0,natlty1_173.0,natlty1_174.0,natlty1_175.0,natlty1_176.0,natlty1_177.0,natlty1_178.0,natlty1_179.0,natlty1_180.0,natlty1_181.0,natlty1_182.0,natlty1_183.0,natlty1_184.0,natlty1_185.0,natlty1_186.0,natlty1_190.0,natlty1_192.0,natlty1_195.0,natlty1_197.0,natlty1_198.0,natlty1_199.0,natlty1_200.0,natlty1_201.0,natlty1_202.0,natlty1_203.0,natlty1_204.0,natlty1_205.0,natlty1_207.0,natlty1_208.0,natlty1_209.0,natlty1_210.0,natlty1_212.0,natlty1_213.0,natlty1_214.0,natlty1_215.0,natlty1_216.0,natlty1_217.0,natlty1_218.0,natlty1_219.0,natlty1_221.0,natlty1_222.0,natlty1_223.0,natlty1_225.0,natlty1_228.0,natlty1_229.0,natlty1_230.0,natlty1_231.0,natlty1_233.0,natlty1_235.0,natlty1_238.0,natlty1_334.0,natlty1_347.0,natlty1_422.0,natlty1_603.0,natlty1_999.0,natlty1_1001.0,natlty1_1002.0,natlty1_1003.0,natlty1_1004.0,INT_LOG_-9,INT_LOG_0,INT_LOG_1,INT_MISC_-9,INT_MISC_0,INT_MISC_1,INT_ANY_-9,INT_ANY_0,INT_ANY_1,INT_IDEO_-9,INT_IDEO_0,INT_IDEO_1,claimed_-9,claimed_0,claimed_1
0,199801010001,1998,1,1,0,Bujumbura Mairie,Bujumbura,-3.375828,29.364107,The incident occurred at Bujumbura Airport.,01/01/1998: Hutu Rebels attacked a Burundi mil...,1,1,1,1,0,Burundi Military,Burundi Military at Bujumbura Airport,Hutu extremists,Unknown,0,100.0,0.0,Unknown,104.0,0.0,100.0,6.0,0.0,0.0,Unknown,"“Burundi Rebels, Ex-Rwandan Army Soldiers Blam...",“Burundi--Attack Reported on Bujumbura Airport...,Unknown,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0
1,199801010002,1998,1,1,0,Moscow (Federal City),Moscow,55.751377,37.579914,Unknown,"01/01/1998: In Russia, a small bomb hidden in ...",1,1,1,1,0,Moscow Metro System,An underground passage in the Moscow Metro,Unknown,Unknown,0,-76.0,0.0,Unknown,0.0,0.0,0.0,3.0,0.0,0.0,Unknown,"“Bomb injures 3 in Moscow subway system,” The ...","“Bomb injures 3 in Moscow subway,” Charleston ...","“Bomb Injures 3 Workers in Moscow Metro,” Los ...",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0
2,199801010003,1998,1,1,0,Northern Ireland,Belfast,54.607712,-5.95621,Belfast (Capital City),01/01/1998: The breakaway Loyalist Volunteer F...,1,1,1,1,0,Civilian,Eddie Trainor,Loyalist Volunteer Forces (LVF),Unknown,0,-76.0,0.0,Unknown,1.0,0.0,0.0,0.0,0.0,0.0,Unknown,“Protestant gunmen kill Catholic in New Year's...,“Ulster Peace Shattered by Shooting: Catholic ...,Unknown,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1
3,199801020001,1998,1,2,0,Baghdad,Baghdad,33.303566,44.371773,Unknown,01/02/1998: The United Nations Special Commiss...,1,1,1,1,0,United Nations,UNSCOM Office Building in Baghdad,Unknown,It was believed this attack was meant to intim...,0,-76.0,0.0,Unknown,0.0,0.0,0.0,0.0,0.0,0.0,The attack caused damage to a security checkpo...,“Iraq Condemns Attack on UNSCOM Baghdad Office...,"Farouk Choukri , “Iraq, UN Officials Continue ...","“Iraqi Interior Minister on UNSCOM Attack, Kuw...",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0
4,199801020002,1998,1,2,0,West Bank,Unknown,31.995965,35.27111,Unknown,01/02/1998: An Israeli woman was critically wo...,1,1,1,0,0,Civilian,Israeli Civilian,Unknown,Unknown,0,-76.0,0.0,Unknown,0.0,0.0,0.0,1.0,0.0,0.0,Unknown,"“Woman Shot,” The Philadelphia Inquirer, Janua...",“Israeli Woman Critically Hurt by Gunfire in W...,Unknown,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0


## word2vec

In [36]:
import gensim
from gensim.models.keyedvectors import KeyedVectors
from gensim.models import word2vec
from nltk.corpus import stopwords

stop = set(stopwords.words('english'))

In [37]:
model = KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)

In [67]:
col = []

for i in tqdm_notebook(txt_col):
    words = []
    for s in df[i]:
        word = [0] * 300
        w_num = 0
        for w in s.split(' '):
            if w.lower() not in stop and w in model:
                word += model[w]
                w_num += 1
            elif w.lower() not in stop and w.lower() in model:
                word += model[w.lower()]
                w_num += 1
        words.append(list(np.array(word) / w_num))
    col.append(words)

A Jupyter Widget




In [68]:
for i, n in tqdm_notebook(enumerate(txt_col)):
    df[n + 'w2v'] = col[i]

A Jupyter Widget




In [None]:
def w2v_pop(x):
    return x.pop()

for n in tqdm_notebook(txt_col):
    df[n + 'w2v'] = col[i]

In [69]:
df.shape
df.head()

(114183, 655)

Unnamed: 0,eventid,iyear,imonth,iday,extended,provstate,city,latitude,longitude,location,summary,crit1,crit2,crit3,success,suicide,corp1,target1,gname,motive,individual,nperps,nperpcap,weapdetail,nkill,nkillus,nkillter,nwound,nwoundus,nwoundte,propcomment,scite1,scite2,scite3,specificity_-1.0,specificity_1.0,specificity_2.0,specificity_3.0,specificity_4.0,specificity_5.0,country_4,country_5,country_6,country_8,country_11,country_12,country_14,country_15,country_16,country_17,country_18,country_19,country_21,country_22,country_23,country_25,country_26,country_28,country_30,country_32,country_33,country_34,country_35,country_36,country_37,country_38,country_41,country_42,country_43,country_44,country_45,country_47,country_49,country_50,country_51,country_53,country_54,country_55,country_56,country_58,country_59,country_60,country_62,country_63,country_64,country_65,country_67,country_68,country_69,country_72,country_73,country_74,country_75,country_76,country_78,country_83,country_84,country_85,country_86,country_87,country_88,country_89,country_90,country_91,country_92,country_93,country_94,country_95,country_96,country_97,country_98,country_99,country_100,country_101,country_102,country_103,country_104,country_106,country_107,country_108,country_109,country_110,country_111,country_112,country_113,country_117,country_118,country_119,country_120,country_121,country_122,country_123,country_124,country_128,country_130,country_132,country_136,country_137,country_138,country_139,country_141,country_142,country_144,country_145,country_146,country_147,country_151,country_153,country_155,country_156,country_157,country_158,country_159,country_160,country_161,country_162,country_164,country_166,country_167,country_168,country_173,country_174,country_175,country_177,country_179,country_180,country_181,country_182,country_183,country_184,country_185,country_186,country_190,country_195,country_197,country_198,country_199,country_200,country_201,country_202,country_203,country_204,country_205,country_207,country_208,country_209,country_210,country_213,country_214,country_215,country_217,country_218,country_219,country_222,country_223,country_228,country_229,country_230,country_231,country_235,country_347,country_349,country_422,country_603,country_1001,country_1002,country_1003,country_1004,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,region_11,region_12,vicinity_-9,vicinity_0,vicinity_1,doubtterr_-1.0,doubtterr_0.0,doubtterr_1.0,multiple_-1.0,multiple_0.0,multiple_1.0,attacktype1_1,attacktype1_2,attacktype1_3,attacktype1_4,attacktype1_5,attacktype1_6,attacktype1_7,attacktype1_8,attacktype1_9,targtype1_1,targtype1_2,targtype1_3,targtype1_4,targtype1_5,targtype1_6,targtype1_7,targtype1_8,targtype1_9,targtype1_10,targtype1_11,targtype1_12,targtype1_13,targtype1_14,targtype1_15,targtype1_16,targtype1_17,targtype1_18,targtype1_19,targtype1_20,targtype1_21,targtype1_22,targsubtype1_-1.0,targsubtype1_1.0,targsubtype1_2.0,targsubtype1_3.0,targsubtype1_4.0,targsubtype1_5.0,targsubtype1_6.0,targsubtype1_7.0,targsubtype1_8.0,targsubtype1_9.0,targsubtype1_10.0,targsubtype1_11.0,targsubtype1_12.0,targsubtype1_13.0,targsubtype1_14.0,targsubtype1_15.0,targsubtype1_16.0,targsubtype1_17.0,targsubtype1_18.0,targsubtype1_19.0,targsubtype1_20.0,targsubtype1_21.0,targsubtype1_22.0,targsubtype1_23.0,targsubtype1_24.0,targsubtype1_25.0,targsubtype1_26.0,targsubtype1_27.0,targsubtype1_28.0,targsubtype1_29.0,targsubtype1_30.0,targsubtype1_31.0,targsubtype1_32.0,targsubtype1_33.0,targsubtype1_34.0,targsubtype1_35.0,targsubtype1_36.0,targsubtype1_37.0,targsubtype1_39.0,targsubtype1_40.0,targsubtype1_41.0,targsubtype1_42.0,targsubtype1_43.0,targsubtype1_44.0,targsubtype1_45.0,targsubtype1_46.0,targsubtype1_47.0,targsubtype1_48.0,targsubtype1_49.0,targsubtype1_50.0,targsubtype1_51.0,targsubtype1_52.0,targsubtype1_53.0,targsubtype1_54.0,targsubtype1_55.0,targsubtype1_56.0,targsubtype1_57.0,targsubtype1_58.0,targsubtype1_59.0,targsubtype1_60.0,targsubtype1_61.0,targsubtype1_62.0,targsubtype1_63.0,targsubtype1_64.0,targsubtype1_65.0,targsubtype1_66.0,targsubtype1_67.0,targsubtype1_68.0,targsubtype1_69.0,targsubtype1_70.0,targsubtype1_71.0,targsubtype1_72.0,targsubtype1_73.0,targsubtype1_74.0,targsubtype1_75.0,targsubtype1_76.0,targsubtype1_77.0,targsubtype1_78.0,targsubtype1_79.0,targsubtype1_80.0,targsubtype1_81.0,targsubtype1_82.0,targsubtype1_83.0,targsubtype1_84.0,targsubtype1_85.0,targsubtype1_86.0,targsubtype1_87.0,targsubtype1_88.0,targsubtype1_89.0,targsubtype1_90.0,targsubtype1_91.0,targsubtype1_92.0,targsubtype1_93.0,targsubtype1_94.0,targsubtype1_95.0,targsubtype1_96.0,targsubtype1_97.0,targsubtype1_98.0,targsubtype1_99.0,targsubtype1_100.0,targsubtype1_101.0,targsubtype1_102.0,targsubtype1_103.0,targsubtype1_104.0,targsubtype1_105.0,targsubtype1_106.0,targsubtype1_107.0,targsubtype1_108.0,targsubtype1_109.0,targsubtype1_110.0,targsubtype1_111.0,targsubtype1_112.0,targsubtype1_113.0,guncertain1_-1.0,guncertain1_0.0,guncertain1_1.0,weaptype1_1,weaptype1_2,weaptype1_3,weaptype1_5,weaptype1_6,weaptype1_7,weaptype1_8,weaptype1_9,weaptype1_10,weaptype1_11,weaptype1_12,weaptype1_13,weapsubtype1_-1.0,weapsubtype1_1.0,weapsubtype1_2.0,weapsubtype1_3.0,weapsubtype1_4.0,weapsubtype1_5.0,weapsubtype1_6.0,weapsubtype1_7.0,weapsubtype1_8.0,weapsubtype1_9.0,weapsubtype1_10.0,weapsubtype1_11.0,weapsubtype1_12.0,weapsubtype1_13.0,weapsubtype1_14.0,weapsubtype1_15.0,weapsubtype1_16.0,weapsubtype1_17.0,weapsubtype1_18.0,weapsubtype1_19.0,weapsubtype1_20.0,weapsubtype1_21.0,weapsubtype1_22.0,weapsubtype1_23.0,weapsubtype1_24.0,weapsubtype1_26.0,weapsubtype1_27.0,weapsubtype1_28.0,weapsubtype1_29.0,weapsubtype1_30.0,weapsubtype1_31.0,property_-9,property_0,property_1,propextent_1.0,propextent_2.0,propextent_3.0,propextent_4.0,ishostkid_-9.0,ishostkid_0.0,ishostkid_1.0,dbsource_Anti-Abortion Project 2010,dbsource_CETIS,dbsource_Eco Project 2010,dbsource_Hewitt Project,dbsource_ISVG,dbsource_Leuprecht Canadian Data,dbsource_START Primary Collection,dbsource_Sageman,dbsource_UMD Algeria 2010-2012,dbsource_UMD Assassinations Project,dbsource_UMD Black Widows 2011,dbsource_UMD Encyclopedia of World Terrorism 2012,dbsource_UMD JTMM Nepal 2012,dbsource_UMD Miscellaneous,dbsource_UMD Schmid 2012,dbsource_UMD Sri Lanka 2011,natlty1_-1.0,natlty1_4.0,natlty1_5.0,natlty1_6.0,natlty1_8.0,natlty1_11.0,natlty1_12.0,natlty1_14.0,natlty1_15.0,natlty1_16.0,natlty1_17.0,natlty1_18.0,natlty1_19.0,natlty1_21.0,natlty1_22.0,natlty1_23.0,natlty1_24.0,natlty1_25.0,natlty1_26.0,natlty1_28.0,natlty1_30.0,natlty1_31.0,natlty1_32.0,natlty1_33.0,natlty1_34.0,natlty1_35.0,natlty1_36.0,natlty1_37.0,natlty1_38.0,natlty1_41.0,natlty1_42.0,natlty1_43.0,natlty1_44.0,natlty1_45.0,natlty1_47.0,natlty1_49.0,natlty1_50.0,natlty1_51.0,natlty1_53.0,natlty1_54.0,natlty1_55.0,natlty1_56.0,natlty1_58.0,natlty1_59.0,natlty1_60.0,natlty1_62.0,natlty1_63.0,natlty1_64.0,natlty1_65.0,natlty1_67.0,natlty1_68.0,natlty1_69.0,natlty1_72.0,natlty1_73.0,natlty1_74.0,natlty1_75.0,natlty1_76.0,natlty1_78.0,natlty1_83.0,natlty1_84.0,natlty1_85.0,natlty1_86.0,natlty1_87.0,natlty1_88.0,natlty1_89.0,natlty1_90.0,natlty1_91.0,natlty1_92.0,natlty1_93.0,natlty1_94.0,natlty1_95.0,natlty1_96.0,natlty1_97.0,natlty1_98.0,natlty1_99.0,natlty1_100.0,natlty1_101.0,natlty1_102.0,natlty1_103.0,natlty1_104.0,natlty1_106.0,natlty1_107.0,natlty1_108.0,natlty1_109.0,natlty1_110.0,natlty1_111.0,natlty1_112.0,natlty1_113.0,natlty1_115.0,natlty1_116.0,natlty1_118.0,natlty1_119.0,natlty1_120.0,natlty1_121.0,natlty1_122.0,natlty1_123.0,natlty1_124.0,natlty1_126.0,natlty1_128.0,natlty1_130.0,natlty1_132.0,natlty1_134.0,natlty1_136.0,natlty1_137.0,natlty1_138.0,natlty1_139.0,natlty1_141.0,natlty1_142.0,natlty1_144.0,natlty1_145.0,natlty1_146.0,natlty1_147.0,natlty1_149.0,natlty1_151.0,natlty1_152.0,natlty1_153.0,natlty1_155.0,natlty1_156.0,natlty1_157.0,natlty1_158.0,natlty1_159.0,natlty1_160.0,natlty1_161.0,natlty1_162.0,natlty1_163.0,natlty1_164.0,natlty1_166.0,natlty1_167.0,natlty1_168.0,natlty1_169.0,natlty1_173.0,natlty1_174.0,natlty1_175.0,natlty1_176.0,natlty1_177.0,natlty1_178.0,natlty1_179.0,natlty1_180.0,natlty1_181.0,natlty1_182.0,natlty1_183.0,natlty1_184.0,natlty1_185.0,natlty1_186.0,natlty1_190.0,natlty1_192.0,natlty1_195.0,natlty1_197.0,natlty1_198.0,natlty1_199.0,natlty1_200.0,natlty1_201.0,natlty1_202.0,natlty1_203.0,natlty1_204.0,natlty1_205.0,natlty1_207.0,natlty1_208.0,natlty1_209.0,natlty1_210.0,natlty1_212.0,natlty1_213.0,natlty1_214.0,natlty1_215.0,natlty1_216.0,natlty1_217.0,natlty1_218.0,natlty1_219.0,natlty1_221.0,natlty1_222.0,natlty1_223.0,natlty1_225.0,natlty1_228.0,natlty1_229.0,natlty1_230.0,natlty1_231.0,natlty1_233.0,natlty1_235.0,natlty1_238.0,natlty1_334.0,natlty1_347.0,natlty1_422.0,natlty1_603.0,natlty1_999.0,natlty1_1001.0,natlty1_1002.0,natlty1_1003.0,natlty1_1004.0,INT_LOG_-9,INT_LOG_0,INT_LOG_1,INT_MISC_-9,INT_MISC_0,INT_MISC_1,INT_ANY_-9,INT_ANY_0,INT_ANY_1,INT_IDEO_-9,INT_IDEO_0,INT_IDEO_1,claimed_-9,claimed_0,claimed_1,locationw2v,summaryw2v,scite1w2v,scite2w2v,scite3w2v,motivew2v,propcommentw2v
0,199801010001,1998,1,1,0,Bujumbura Mairie,Bujumbura,-3.375828,29.364107,The incident occurred at Bujumbura Airport.,01/01/1998: Hutu Rebels attacked a Burundi mil...,1,1,1,1,0,Burundi Military,Burundi Military at Bujumbura Airport,Hutu extremists,Unknown,0,100.0,0.0,Unknown,104.0,0.0,100.0,6.0,0.0,0.0,Unknown,"“Burundi Rebels, Ex-Rwandan Army Soldiers Blam...",“Burundi--Attack Reported on Bujumbura Airport...,Unknown,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,"[-0.05615234375, 0.08138020833333333, 0.194986...","[0.05671275745738636, 0.09612482244318182, 0.1...","[0.10308159722222222, 0.10622151692708333, 0.0...","[0.007039388020833333, 0.15312703450520834, 0....","[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0..."
1,199801010002,1998,1,1,0,Moscow (Federal City),Moscow,55.751377,37.579914,Unknown,"01/01/1998: In Russia, a small bomb hidden in ...",1,1,1,1,0,Moscow Metro System,An underground passage in the Moscow Metro,Unknown,Unknown,0,-76.0,0.0,Unknown,0.0,0.0,0.0,3.0,0.0,0.0,Unknown,"“Bomb injures 3 in Moscow subway system,” The ...","“Bomb injures 3 in Moscow subway,” Charleston ...","“Bomb Injures 3 Workers in Moscow Metro,” Los ...",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,"[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.05726909637451172, 0.015926361083984375, -0...","[-0.03901018415178571, -0.057926722935267856, ...","[-0.0555419921875, -0.009930928548177084, -0.0...","[0.06563895089285714, 0.009390694754464286, 0....","[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0..."
2,199801010003,1998,1,1,0,Northern Ireland,Belfast,54.607712,-5.95621,Belfast (Capital City),01/01/1998: The breakaway Loyalist Volunteer F...,1,1,1,1,0,Civilian,Eddie Trainor,Loyalist Volunteer Forces (LVF),Unknown,0,-76.0,0.0,Unknown,1.0,0.0,0.0,0.0,0.0,0.0,Unknown,“Protestant gunmen kill Catholic in New Year's...,“Ulster Peace Shattered by Shooting: Catholic ...,Unknown,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,1,"[-0.08837890625, 0.1630859375, -0.1513671875, ...","[0.07959626850328948, 0.01355944181743421, 0.0...","[-0.005069732666015625, 0.03141021728515625, 0...","[0.046478271484375, 0.093841552734375, 0.13295...","[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0..."
3,199801020001,1998,1,2,0,Baghdad,Baghdad,33.303566,44.371773,Unknown,01/02/1998: The United Nations Special Commiss...,1,1,1,1,0,United Nations,UNSCOM Office Building in Baghdad,Unknown,It was believed this attack was meant to intim...,0,-76.0,0.0,Unknown,0.0,0.0,0.0,0.0,0.0,0.0,The attack caused damage to a security checkpo...,“Iraq Condemns Attack on UNSCOM Baghdad Office...,"Farouk Choukri , “Iraq, UN Officials Continue ...","“Iraqi Interior Minister on UNSCOM Attack, Kuw...",0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,"[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.0421142578125, 0.011469523111979166, 0.0645...","[0.193994140625, 0.08265380859375, 0.083691406...","[0.07753499348958333, 0.12691752115885416, 0.0...","[-0.050313313802083336, 0.09324137369791667, 0...","[-0.0018157958984375, 0.11676025390625, 0.2587...","[0.0805511474609375, 0.0972900390625, 0.004539..."
4,199801020002,1998,1,2,0,West Bank,Unknown,31.995965,35.27111,Unknown,01/02/1998: An Israeli woman was critically wo...,1,1,1,0,0,Civilian,Israeli Civilian,Unknown,Unknown,0,-76.0,0.0,Unknown,0.0,0.0,0.0,1.0,0.0,0.0,Unknown,"“Woman Shot,” The Philadelphia Inquirer, Janua...",“Israeli Woman Critically Hurt by Gunfire in W...,Unknown,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,1,0,0,0,1,0,"[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.10149069393382353, 0.05663344439338235, -0....","[0.03228759765625, 0.013702392578125, -0.04028...","[0.007389068603515625, 0.08745574951171875, -0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0...","[0.2373046875, 0.26171875, 0.11865234375, -0.0..."


In [16]:
# class MySentences(object):
#     def __init__(self, dirname):
#         self.dirname = dirname

#     def __iter__(self):
#         for line in open(self.dirname, 'r', encoding='utf-8'):
#             yield line.split()
            
# for i in tqdm_notebook(txt_col):
#     sentences = list(MySentences('./w2v/' + (i) + '.txt'))
#     model = gensim.models.Word2Vec(
#         sentences,
#         sg=1,
#         min_count=5,
#         size=300,
#         hs=1,
#         compute_loss=True,
#         window=30,
#         workers=8,
#         sample=1e-4,
#         iter=10,
#         seed=1024)
#     models.append(model)
#     print(file, 'train success')