In [1]:
#!pip3 install missing packages

import numpy as np
import re
import requests
import pandas as pd
import seaborn as sns
from functools import reduce

from igel import Igel

import matplotlib.pyplot as plt
from matplotlib import rcParams

from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Helper Functions

In [2]:
def clean_dataset(df):
    #assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    #df.dropna(inplace=True)
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    df.replace(np.nan,0)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    df = df.replace(r'\D+', '', regex=True)
    return df
    #return df[indices_to_keep].astype(np.float64)
    

def clean_dataset_int(df):
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    df.replace(np.nan,0)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    df = df.replace(r'\D+', '', regex=True)
    return df[indices_to_keep].astype(np.float64)

## Mappings

Defining various mappings we'll use when manipulating the data.

In [3]:
priority_scorer = {
    '10' : 'Critical',
    '9' : 'Critical',
    '8' : 'Critical',
    '7' : 'High',
    '6' : 'High',
    '5' : 'Medium',
    '4' : 'Medium',
    '3' : 'Low',
    '2' : 'Low',
    '1.0' : 'Low',
    '0.0' : 'Low',
}

priority_mapping = {
    "Critical" : 10,
    "High" : 7.5,
    "Medium" : 5,
    "Low" : 2.5,
    "Unknown" : 0,
}

# What we consider to be highly important categories of information
highCategoriser = {
    0.0 : 'Other-Advice',
    1.0 : 'Other-Advice',
    2.0 : 'Report-CleanUp',
    3.0 : 'ContextualInformation',
    4.0 : 'Other-ContextualInformation',
    5.0 : 'CallToAction-Donations',
    6.0 : 'Report-EmergingThreats',
    7.0 : 'Report-Factoid',
    8.0 : 'Report-FirstPartyObservation',
    9.0 : 'Request-GoodsServices',
    10.0 : 'Report-Hashtags',
    11.0 : 'Request-InformationWanted',
    12.0 : 'Other-Irrelevant',
    13.0 : 'Report-Location',
    14.0 : 'CallToAction-MovePeople',
    15.0 : 'Report-MultimediaShare',
    16.0 : 'Report-NewSubEvent',
    17.0 : 'Report-News',
    18.0 : 'Report-Official',
    19.0 : 'Report-OriginalEvent',
    20.0 : 'Request-SearchAndRescue',
    21.0 : 'Other-Sentiment',
    22.0 : 'Report-ServiceAvailable',
    23.0 : 'Report-ThirdPartyObservation',
    24.0 : 'CallToAction-Volunteer',
    25.0 : 'Report-Weather',
    26.0 : 'hmm',
}

event_int_map =	{
  "guatemalaEarthquake2012": 7,
  "joplinTornado2011": 16,
  "athensEarthquake2020": 35,
  "baltimoreFlashFlood2020": 36,
  "brooklynBlockPartyShooting2020": 37,
  "daytonOhioShooting2020": 38,
  "elPasoWalmartShooting2020": 39,
  "gilroygarlicShooting2020": 40,
  "hurricaneBarry2020": 41,
  "indonesiaEarthquake2020": 42,
  "keralaFloods2020": 43,
  "myanmarFloods2020": 44,
  "papuaNewguineaEarthquake2020": 45,
  "siberianWildfires2020": 46,
  "typhoonKrosa2020": 47,
  "typhoonLekima2020": 48,
  "whaleyBridgeCollapse2020": 49
}

mymap = {'Advice':1, 'CleanUp':2, 'ContextualInformation':3, 'Discussion':4, 'Donations':5, 
        'EmergingThreats':6, 'Factoid':7, 'FirstPartyObservation':8, 'GoodsServices':9, 'Hashtags':10, 
        'InformationWanted':11,'Irrelevant':12, 'Location':13, 'MovePeople':14, 
         'MultimediaShare':15, 'NewSubEvent':16, 'News':17,
        'Official':18, 'OriginalEvent':19, 'SearchAndRescue':20, 'Sentiment':21, 'ServiceAvailable':22, 
         'ThirdPartyObservation':23,'Volunteer':24, 'Weather':25}

## FeatureAPI

Load the feature vector in from Play

In [4]:
import requests


#url = 'http://localhost:9000/stored_tweets'
import json
def parse_json_stream(stream):
    decoder = json.JSONDecoder()
    while stream:
        obj, idx = decoder.raw_decode(stream)
        yield obj
        stream = stream[idx:].lstrip()

url = 'http://tweetminer-2336003gproject.ida.dcs.gla.ac.uk/stored_tweets'

data = requests.get(url).text  

# delete unused parameters to parse to JSON format
dataFormat = data.replace("\r\n  ","").replace("\r\n","").replace("}\n{", "},\n{").replace(",}", "}").replace("\n", "")

dataFormat = dataFormat.replace(' \\"', " ").replace('\\" ', " ")

# convert to valid structure
dataFormat='{"datas":['+dataFormat+']}'

# convert to JSON format
dataJson = json.loads(dataFormat)

dfNonFormatte= pd.DataFrame(dataJson)

dfNonFormatte

# convert our JSON to DATAFRAM
a = 0
df = []
for element in dfNonFormatte["datas"]:
  df1 = dfNonFormatte["datas"][a] 
  df2 = pd.json_normalize(df1['tweets'])
  df.append(df2)
  a = a + 1

    
# Concat result
df = pd.concat(df, axis=0, join='outer', ignore_index=True)
df

Unnamed: 0,tweet_id,tweet_text,offset,embeddings,sentiment,features.numb_of_urls,features.numb_of_hashtags,features.numb_of_personal_pronouns,features.numb_of_present_tenses,features.weighted_length,features.permillage,features.tweet_created_at,features.tweet_id_str,features.positive_sentiment,features.negative_sentiment,features.numb_of_mentions,features.numb_of_media,features.numb_of_past_tenses,features.numb_of_weird_chars,features.numb_of_questions,features.numb_of_emoticons,features.numb_of_swearing_word,features.numb_of_slang_words,features.numb_of_intensifiers,features.tweet_length,features.userFollowersCount,features.userFriendsCount,features.user_numb_of_tweets,features.user_list_count,features.dict_precision,features.dict_recall,features.dict_f_measure,features.offset,features.is_verified,tfIdf.bc,tfIdf.tsunami,tfIdf.papua,tfIdf.powerful,tfIdf.quake,tfIdf.guinea,tfIdf.earthquake,tfIdf.passed,tfIdf.threat,tfIdf.pacificocean,tfIdf.papuanewguinea,tfIdf.included,tfIdf.newguinea,tfIdf.staysafe,tfIdf.solomonislands,tfIdf.news,tfIdf.kitv,tfIdf.sports,tfIdf.amp,tfIdf.hawaii,tfIdf.channel,tfIdf.weather,tfIdf.honolulu,tfIdf.morning,tfIdf.britain,tfIdf.imagine,tfIdf.islands,tfIdf.engulfing,tfIdf.prayers,tfIdf.sea,tfIdf.hits,tfIdf.ring,tfIdf.alert,tfIdf.magnitude,tfIdf.issued,tfIdf.isolated,tfIdf.reported,tfIdf.hope,tfIdf.preliminary,tfIdf.lifted,tfIdf.warning,tfIdf.app,tfIdf.nothings,tfIdf.strong,tfIdf.earthbound,tfIdf.cmes,tfIdf.strikes,tfIdf.sun,tfIdf.coast,tfIdf.west,tfIdf.summary,tfIdf.kokopo,tfIdf.sse,tfIdf.gisborne,tfIdf.ne,tfIdf.report,tfIdf.zealand,tfIdf.ene,tfIdf.rocked,tfIdf.waves,tfIdf.epicenter,tfIdf.hazardous,tfIdf.coastal,tfIdf.massive,tfIdf.earthquakepapua,tfIdf.ripple,tfIdf.huge,tfIdf.portmoresby,tfIdf.usgs,tfIdf.radius,tfIdf.waved,tfIdf.abound,tfIdf.negative,tfIdf.paupau,tfIdf.earthquakes,tfIdf.magnet,tfIdf.geological,tfIdf.solomon,tfIdf.hit,tfIdf.survey,tfIdf.region,tfIdf.tuesday,tfIdf.rattled,tfIdf.earthquakemag,tfIdf.local,tfIdf.california,tfIdf.tsunamiwarning,tfIdf.read,tfIdf.thenews,tfIdf.cdt,tfIdf.occurred,tfIdf.tue,tfIdf.event,tfIdf.pacific,tfIdf.triggered,tfIdf.breaking,tfIdf.abc,tfIdf.island,tfIdf.residents,tfIdf.rattles,tfIdf.passes,tfIdf.cbc,tfIdf.repent,tfIdf.chance,tfIdf.israel,tfIdf.awakening,tfIdf.happening,tfIdf.god,tfIdf.country,tfIdf.stuck,tfIdf.nearby,tfIdf.triggering,tfIdf.central,tfIdf.nations,tfIdf.killed,tfIdf.subsiding,tfIdf.struck,tfIdf.hours,tfIdf.utc,tfIdf.erthquakes,tfIdf.volcanoes,tfIdf.ago,tfIdf.info,tfIdf.south,tfIdf.center,tfIdf.raised,tfIdf.late,tfIdf.depth,tfIdf.regionsolomon,tfIdf.location,tfIdf.magnitudemb,tfIdf.measured,tfIdf.papuanuevaguinea,tfIdf.northeast,tfIdf.australia,tfIdf.canberra,tfIdf.kilometers,tfIdf.located,tfIdf.evening,tfIdf.destructive,tfIdf.possibility,tfIdf.damage,tfIdf.images,tfIdf.expected,tfIdf.update,tfIdf.coming,tfIdf.papau,tfIdf.pray,tfIdf.fox,tfIdf.papa,tfIdf.animals,tfIdf.minutes,tfIdf.latestcomments,tfIdf.violent,tfIdf.shared,tfIdf.download,tfIdf.android,tfIdf.express,tfIdf.click,tfIdf.indian,tfIdf.expert,tfIdf.wise,tfIdf.hurricanes,tfIdf.bad,tfIdf.parties,tfIdf.hurricane,tfIdf.shots,tfIdf.prompting,tfIdf.prelim,tfIdf.reuters,tfIdf.prompts,tfIdf.lord,tfIdf.safe,tfIdf.expanded,tfIdf.usa,tfIdf.deep,tfIdf.solomons,tfIdf.tidal,tfIdf.risk,tfIdf.wave,tfIdf.overseas,tfIdf.moment,tfIdf.oceania,tfIdf.cancelled,tfIdf.headlines,tfIdf.developing,tfIdf.youtube,tfIdf.birthday,tfIdf.st,tfIdf.watch,tfIdf.israels,tfIdf.tsunamis,tfIdf.advertiser,tfIdf.star,tfIdf.poses,tfIdf.measuring,tfIdf.miles,tfIdf.earthquakeph,tfIdf.shallow,tfIdf.chain,tfIdf.north,tfIdf.knocking,tfIdf.safety,tfIdf.fleeing,tfIdf.sending,tfIdf.power,tfIdf.wake,tfIdf.rabaul,tfIdf.coasts,tfIdf.centre,tfIdf.epicentre,tfIdf.triggers,tfIdf.mag,tfIdf.eqalerts,tfIdf.ireland,tfIdf.newearthquake,...,tfIdf.audition,tfIdf.kurup,tfIdf.mahe,tfIdf.floodsheavy,tfIdf.kanhangad,tfIdf.horanadu,tfIdf.breached,tfIdf.muvattupuzha,tfIdf.mavoor,tfIdf.schoolsafety,tfIdf.schoolsout,tfIdf.nedumbassery,tfIdf.chaired,tfIdf.rajani,tfIdf.incessantly,tfIdf.extremeweatherevents,tfIdf.southwestmonsoon,tfIdf.upward,tfIdf.preyed,tfIdf.nallathanni,tfIdf.avalanche,tfIdf.theburningquestion,tfIdf.udumalai,tfIdf.periyavarrai,tfIdf.providing,tfIdf.tada,tfIdf.repeated,tfIdf.mumbaipuneexpressway,tfIdf.refused,tfIdf.expressway,tfIdf.relent,tfIdf.heatmap,tfIdf.ibms,tfIdf.helpkerala,tfIdf.registrationkerala,tfIdf.wrecking,tfIdf.ravaging,tfIdf.toes,tfIdf.aviationdaily,tfIdf.avgeek,tfIdf.moolamattom,tfIdf.kottamala,tfIdf.ashramam,tfIdf.anim,tfIdf.skymet,tfIdf.tap,tfIdf.mobilises,tfIdf.paralyse,tfIdf.areekode,tfIdf.appaling,tfIdf.succumbed,tfIdf.kannadikkal,tfIdf.newss,tfIdf.livemint,tfIdf.oruvela,tfIdf.laam,tfIdf.adangidich,tfIdf.hostel,tfIdf.surviving,tfIdf.idduki,tfIdf.booked,tfIdf.depressed,tfIdf.kuttiady,tfIdf.adjacent,tfIdf.kasargode,tfIdf.helpforkerala,tfIdf.improvement,tfIdf.kochiinternational,tfIdf.waterloggeded,tfIdf.efficiency,tfIdf.signalling,tfIdf.nofakenews,tfIdf.understandably,tfIdf.ugh,tfIdf.limited,tfIdf.suspending,tfIdf.inaccessible,tfIdf.trivendrum,tfIdf.allepay,tfIdf.kovalam,tfIdf.newsflash,tfIdf.newsbulltein,tfIdf.diaster,tfIdf.taluks,tfIdf.mannarkkud,tfIdf.waybad,tfIdf.chennairain,tfIdf.satara,tfIdf.chiplun,tfIdf.edavanna,tfIdf.ideas,tfIdf.innovative,tfIdf.incapable,tfIdf.leverage,tfIdf.revisit,tfIdf.commentary,tfIdf.columns,tfIdf.defence,tfIdf.wayanads,tfIdf.practise,tfIdf.hotels,tfIdf.lakes,tfIdf.swathes,tfIdf.threadbare,tfIdf.roadblocked,tfIdf.msgs,tfIdf.rejoicing,tfIdf.besafebealert,tfIdf.strongertogether,tfIdf.shahada,tfIdf.nandurbar,tfIdf.borders,tfIdf.reliefment,tfIdf.catastrophe,tfIdf.continous,tfIdf.coimbatorerain,tfIdf.gruesome,tfIdf.gargantuan,tfIdf.mallu,tfIdf.allover,tfIdf.jaisal,tfIdf.talked,tfIdf.thaug,tfIdf.believeinrahul,tfIdf.disunite,tfIdf.malayali,tfIdf.trys,tfIdf.gandhis,tfIdf.responsibilities,tfIdf.kannadigas,tfIdf.chennairains,tfIdf.heavier,tfIdf.brigade,tfIdf.deployment,tfIdf.nil,tfIdf.latestnews,tfIdf.hindinews,tfIdf.idukkinews,tfIdf.indiannews,tfIdf.newsupdate,tfIdf.keralanews,tfIdf.lastyear,tfIdf.hillstations,tfIdf.reminding,tfIdf.oneyearofsurvival,tfIdf.hoilday,tfIdf.delugediaries,tfIdf.examines,tfIdf.brick,tfIdf.uprooting,tfIdf.localised,tfIdf.landless,tfIdf.westerlies,tfIdf.fading,tfIdf.commission,tfIdf.collectors,tfIdf.palani,tfIdf.thenmala,tfIdf.achyuths,tfIdf.kodaikkanal,tfIdf.suryanelli,tfIdf.inter,tfIdf.plzzz,tfIdf.augmenting,tfIdf.aara,tfIdf.chai,tfIdf.muse,tfIdf.nee,tfIdf.toast,tfIdf.pattanamthitta,tfIdf.cwc,tfIdf.prevails,tfIdf.examinations,tfIdf.soldiers,tfIdf.jcos,tfIdf.eco,tfIdf.task,tfIdf.valapatanam,tfIdf.mgmnt,tfIdf.kurumanpuzha,tfIdf.kuthirapuzha,tfIdf.centralwatercommission,tfIdf.officially,tfIdf.informs,tfIdf.apron,tfIdf.addresstothenation,tfIdf.overcoming,tfIdf.generouslykeralafloods,tfIdf.vyanadu,tfIdf.helpe,tfIdf.mundakkai,tfIdf.chooralmala,tfIdf.ayyappa,tfIdf.sanghi,tfIdf.bonkers,tfIdf.communism,tfIdf.sorts,tfIdf.keralafloodlive,tfIdf.kkd,tfIdf.overflowed,tfIdf.ncmc,tfIdf.chairs,tfIdf.heavyrainfall,tfIdf.sinha,tfIdf.kattappana,tfIdf.adimali,tfIdf.medicalcamp,tfIdf.rainrelief,tfIdf.drills,tfIdf.sos,tfIdf.verification,tfIdf.silt,tfIdf.flows,tfIdf.ernakulamleave,tfIdf.tommarow,tfIdf.edn,tfIdf.tributary,tfIdf.peruvammuzhy,tfIdf.ndrfu,tfIdf.pond,tfIdf.artificial,tfIdf.kozikhode,tfIdf.experiences,tfIdf.pooyamkutty,tfIdf.breaching,tfIdf.baiju,tfIdf.manikandanchal,tfIdf.laments,tfIdf.exposed,tfIdf.chennaifloods,tfIdf.mangalamdam,tfIdf.cherukunnapuzha,tfIdf.mangalam,tfIdf.attraction,tfIdf.dstrt,tfIdf.keralano,tfIdf.perinthalmanna,tfIdf.trissur,tfIdf.devendrafadnavis,tfIdf.disasterrelief,tfIdf.maharashtracabinet,tfIdf.jokers,tfIdf.firefighters,tfIdf.kalarkutty,tfIdf.kozikode,tfIdf.kallarkutty,tfIdf.temples,tfIdf.courtyard,tfIdf.assess,tfIdf.adversely
0,1128355711044521984,"[tsunami, threat, included, passed, powerful, ...",62140.0,"{9.324254,0.1151799,2.6183999,9.807115,6.34458...","-0.09 , -0.01 , 0.17 , -0.08 , 0.09 , 0.03 , ...",,6.0,0.0,0.0,195.0,696.0,1.557856e+12,1.128356e+18,0.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,134.0,3496.0,2215.0,0.0,178.0,0.0,0.0,0.0,0.0,0.0,0.411948,0.080729,0.013757,0.111970,0.098354,0.010751,0.036456,0.341889,0.137446,0.44091,0.212434,0.490421,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1128355449445736448,"[staysafe, papua, newguinea, tsunami, earthqua...",62077.0,"{3.4869452,-0.6036901,0.098899916,4.98685,2.16...","-0.12 , 0.11 , 0.18 , -0.09 , 0.04 , 0.00 , 0...",,6.0,0.0,0.0,100.0,357.0,1.557856e+12,1.128355e+18,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,65.0,5673.0,4996.0,0.0,374.0,0.0,0.0,0.0,0.0,0.0,,0.094184,0.032099,,,,0.042533,,,,,,0.913266,1.144315,0.744666,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1128355212287254531,"[tsunami, threat, hawaii, morning, quake, hono...",62021.0,"{0.25552636,6.876851,-1.0695575,13.627133,4.18...","-0.16 , -0.03 , 0.11 , -0.01 , 0.09 , 0.01 , ...",,0.0,0.0,0.0,155.0,553.0,1.557856e+12,1.128355e+18,0.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,107.0,1350.0,501.0,0.0,78.0,0.0,0.0,0.0,0.0,0.0,,0.040365,,,0.098354,,,,0.137446,,,,,,,0.19116,0.3914,0.44091,0.510008,0.46891,0.341889,0.312927,0.411948,0.319143,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1128355199448485888,"[imagine, tsunami, engulfing, britain, prayers...",62018.0,"{4.4890995,1.384184,-2.284342,3.0452402,4.2513...","-0.01 , 0.04 , 0.09 , -0.12 , 0.06 , 0.06 , 0...",,0.0,0.0,0.0,110.0,392.0,1.557856e+12,1.128355e+18,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,61.0,11.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.080729,,,,,,,,,,,,,,,,,,,,,,,0.311966,0.980842,0.334872,0.980842,0.683778,0.750922,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1128355159887753216,"[powerful, magnitude, quake, hits, papua, guin...",62008.0,"{6.17479,-1.0230901,1.5341498,9.082365,5.94773...","-0.12 , -0.08 , -0.04 , 0.07 , -0.01 , 0.02 ,...",,0.0,0.0,0.0,129.0,460.0,1.557856e+12,1.128355e+18,0.0,0.0,1.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,79.0,454.0,357.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,,0.056511,0.019259,0.156757,0.137695,0.015051,,,,,,,,,,,,,,,,,,,,,,,,,0.20217,0.576728,0.151878,0.104285,0.219306,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13564,1159359542863548421,"[imd, issues, red, alert, idukki, malappuram, ...",138502.0,"{-0.930875,-5.0504575,-8.921433,6.67087,4.7533...","0.03 , 0.12 , 0.12 , -0.13 , -0.01 , 0.07 , 0...",,1.0,0.0,0.0,177.0,632.0,1.565248e+12,1.159360e+18,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,130.0,186.0,565.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.185941,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
13565,1159358275348942848,"[heavy, rains, kerala, scene, courtyard, palak...",138200.0,"{1.1679858,0.010114923,-3.8072298,1.39957,2.42...","-0.12 , 0.11 , 0.14 , -0.08 , 0.03 , 0.02 , 0...",,2.0,0.0,0.0,98.0,350.0,1.565248e+12,1.159358e+18,0.0,0.0,0.0,1.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,63.0,279.0,306.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.127866,,
13566,1159357325955649536,"[kerala, government, called, level, meet, asse...",137973.0,"{7.228066,3.1975458,-1.3576192,4.8822265,8.643...","-0.17 , -0.12 , 0.04 , 0.06 , 0.09 , -0.04 , ...",,2.0,0.0,0.0,221.0,789.0,1.565247e+12,1.159357e+18,0.0,0.0,1.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,145.0,371468.0,537.0,0.0,1359.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.526338,
13567,1159354521421619200,"[imd, issues, red, alert, idukki, malappuram, ...",137305.0,"{-0.930875,-5.0504575,-8.921433,6.67087,4.7533...","0.03 , 0.12 , 0.12 , -0.13 , -0.01 , 0.07 , 0...",,1.0,0.0,0.0,177.0,632.0,1.565247e+12,1.159355e+18,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,130.0,5437419.0,194.0,0.0,5742.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.185941,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# Normalise the sentiment and the embeddings

> Extract the sentiment and embeddings into multiple columns depending on need                                                 

In [5]:
def expand_sentiment(df):
    df['sentiment'] = df[df['sentiment'].apply(lambda x:pd.Series(x))]
    df_sentiment=pd.concat([df['sentiment'].str.split(',', expand=True)], axis=1, keys="s")
    # Join back onto the original dataframe
    df.join(df_sentiment)
    return df
    
def expand_embeddings(df):
    df['embeddings'] = df['embeddings'].str.extract('(\d+)', expand=False)
    df['embeddings'] = df['embeddings'].str[1:]
    df_embeddings = pd.concat([df['embeddings'].str.split(',', expand=True)], axis=1, keys="e")
    df_embeddings
    df.join(df_embeddings) # Join back onto the original dataframe
    #df_emb_sent=df_sentiment.join(df_embeddings)
    #df_emb_sent
    #df.join(df_emb_sent)
    return df

In [6]:
df

Unnamed: 0,tweet_id,tweet_text,offset,embeddings,sentiment,features.numb_of_urls,features.numb_of_hashtags,features.numb_of_personal_pronouns,features.numb_of_present_tenses,features.weighted_length,features.permillage,features.tweet_created_at,features.tweet_id_str,features.positive_sentiment,features.negative_sentiment,features.numb_of_mentions,features.numb_of_media,features.numb_of_past_tenses,features.numb_of_weird_chars,features.numb_of_questions,features.numb_of_emoticons,features.numb_of_swearing_word,features.numb_of_slang_words,features.numb_of_intensifiers,features.tweet_length,features.userFollowersCount,features.userFriendsCount,features.user_numb_of_tweets,features.user_list_count,features.dict_precision,features.dict_recall,features.dict_f_measure,features.offset,features.is_verified,tfIdf.bc,tfIdf.tsunami,tfIdf.papua,tfIdf.powerful,tfIdf.quake,tfIdf.guinea,tfIdf.earthquake,tfIdf.passed,tfIdf.threat,tfIdf.pacificocean,tfIdf.papuanewguinea,tfIdf.included,tfIdf.newguinea,tfIdf.staysafe,tfIdf.solomonislands,tfIdf.news,tfIdf.kitv,tfIdf.sports,tfIdf.amp,tfIdf.hawaii,tfIdf.channel,tfIdf.weather,tfIdf.honolulu,tfIdf.morning,tfIdf.britain,tfIdf.imagine,tfIdf.islands,tfIdf.engulfing,tfIdf.prayers,tfIdf.sea,tfIdf.hits,tfIdf.ring,tfIdf.alert,tfIdf.magnitude,tfIdf.issued,tfIdf.isolated,tfIdf.reported,tfIdf.hope,tfIdf.preliminary,tfIdf.lifted,tfIdf.warning,tfIdf.app,tfIdf.nothings,tfIdf.strong,tfIdf.earthbound,tfIdf.cmes,tfIdf.strikes,tfIdf.sun,tfIdf.coast,tfIdf.west,tfIdf.summary,tfIdf.kokopo,tfIdf.sse,tfIdf.gisborne,tfIdf.ne,tfIdf.report,tfIdf.zealand,tfIdf.ene,tfIdf.rocked,tfIdf.waves,tfIdf.epicenter,tfIdf.hazardous,tfIdf.coastal,tfIdf.massive,tfIdf.earthquakepapua,tfIdf.ripple,tfIdf.huge,tfIdf.portmoresby,tfIdf.usgs,tfIdf.radius,tfIdf.waved,tfIdf.abound,tfIdf.negative,tfIdf.paupau,tfIdf.earthquakes,tfIdf.magnet,tfIdf.geological,tfIdf.solomon,tfIdf.hit,tfIdf.survey,tfIdf.region,tfIdf.tuesday,tfIdf.rattled,tfIdf.earthquakemag,tfIdf.local,tfIdf.california,tfIdf.tsunamiwarning,tfIdf.read,tfIdf.thenews,tfIdf.cdt,tfIdf.occurred,tfIdf.tue,tfIdf.event,tfIdf.pacific,tfIdf.triggered,tfIdf.breaking,tfIdf.abc,tfIdf.island,tfIdf.residents,tfIdf.rattles,tfIdf.passes,tfIdf.cbc,tfIdf.repent,tfIdf.chance,tfIdf.israel,tfIdf.awakening,tfIdf.happening,tfIdf.god,tfIdf.country,tfIdf.stuck,tfIdf.nearby,tfIdf.triggering,tfIdf.central,tfIdf.nations,tfIdf.killed,tfIdf.subsiding,tfIdf.struck,tfIdf.hours,tfIdf.utc,tfIdf.erthquakes,tfIdf.volcanoes,tfIdf.ago,tfIdf.info,tfIdf.south,tfIdf.center,tfIdf.raised,tfIdf.late,tfIdf.depth,tfIdf.regionsolomon,tfIdf.location,tfIdf.magnitudemb,tfIdf.measured,tfIdf.papuanuevaguinea,tfIdf.northeast,tfIdf.australia,tfIdf.canberra,tfIdf.kilometers,tfIdf.located,tfIdf.evening,tfIdf.destructive,tfIdf.possibility,tfIdf.damage,tfIdf.images,tfIdf.expected,tfIdf.update,tfIdf.coming,tfIdf.papau,tfIdf.pray,tfIdf.fox,tfIdf.papa,tfIdf.animals,tfIdf.minutes,tfIdf.latestcomments,tfIdf.violent,tfIdf.shared,tfIdf.download,tfIdf.android,tfIdf.express,tfIdf.click,tfIdf.indian,tfIdf.expert,tfIdf.wise,tfIdf.hurricanes,tfIdf.bad,tfIdf.parties,tfIdf.hurricane,tfIdf.shots,tfIdf.prompting,tfIdf.prelim,tfIdf.reuters,tfIdf.prompts,tfIdf.lord,tfIdf.safe,tfIdf.expanded,tfIdf.usa,tfIdf.deep,tfIdf.solomons,tfIdf.tidal,tfIdf.risk,tfIdf.wave,tfIdf.overseas,tfIdf.moment,tfIdf.oceania,tfIdf.cancelled,tfIdf.headlines,tfIdf.developing,tfIdf.youtube,tfIdf.birthday,tfIdf.st,tfIdf.watch,tfIdf.israels,tfIdf.tsunamis,tfIdf.advertiser,tfIdf.star,tfIdf.poses,tfIdf.measuring,tfIdf.miles,tfIdf.earthquakeph,tfIdf.shallow,tfIdf.chain,tfIdf.north,tfIdf.knocking,tfIdf.safety,tfIdf.fleeing,tfIdf.sending,tfIdf.power,tfIdf.wake,tfIdf.rabaul,tfIdf.coasts,tfIdf.centre,tfIdf.epicentre,tfIdf.triggers,tfIdf.mag,tfIdf.eqalerts,tfIdf.ireland,tfIdf.newearthquake,...,tfIdf.audition,tfIdf.kurup,tfIdf.mahe,tfIdf.floodsheavy,tfIdf.kanhangad,tfIdf.horanadu,tfIdf.breached,tfIdf.muvattupuzha,tfIdf.mavoor,tfIdf.schoolsafety,tfIdf.schoolsout,tfIdf.nedumbassery,tfIdf.chaired,tfIdf.rajani,tfIdf.incessantly,tfIdf.extremeweatherevents,tfIdf.southwestmonsoon,tfIdf.upward,tfIdf.preyed,tfIdf.nallathanni,tfIdf.avalanche,tfIdf.theburningquestion,tfIdf.udumalai,tfIdf.periyavarrai,tfIdf.providing,tfIdf.tada,tfIdf.repeated,tfIdf.mumbaipuneexpressway,tfIdf.refused,tfIdf.expressway,tfIdf.relent,tfIdf.heatmap,tfIdf.ibms,tfIdf.helpkerala,tfIdf.registrationkerala,tfIdf.wrecking,tfIdf.ravaging,tfIdf.toes,tfIdf.aviationdaily,tfIdf.avgeek,tfIdf.moolamattom,tfIdf.kottamala,tfIdf.ashramam,tfIdf.anim,tfIdf.skymet,tfIdf.tap,tfIdf.mobilises,tfIdf.paralyse,tfIdf.areekode,tfIdf.appaling,tfIdf.succumbed,tfIdf.kannadikkal,tfIdf.newss,tfIdf.livemint,tfIdf.oruvela,tfIdf.laam,tfIdf.adangidich,tfIdf.hostel,tfIdf.surviving,tfIdf.idduki,tfIdf.booked,tfIdf.depressed,tfIdf.kuttiady,tfIdf.adjacent,tfIdf.kasargode,tfIdf.helpforkerala,tfIdf.improvement,tfIdf.kochiinternational,tfIdf.waterloggeded,tfIdf.efficiency,tfIdf.signalling,tfIdf.nofakenews,tfIdf.understandably,tfIdf.ugh,tfIdf.limited,tfIdf.suspending,tfIdf.inaccessible,tfIdf.trivendrum,tfIdf.allepay,tfIdf.kovalam,tfIdf.newsflash,tfIdf.newsbulltein,tfIdf.diaster,tfIdf.taluks,tfIdf.mannarkkud,tfIdf.waybad,tfIdf.chennairain,tfIdf.satara,tfIdf.chiplun,tfIdf.edavanna,tfIdf.ideas,tfIdf.innovative,tfIdf.incapable,tfIdf.leverage,tfIdf.revisit,tfIdf.commentary,tfIdf.columns,tfIdf.defence,tfIdf.wayanads,tfIdf.practise,tfIdf.hotels,tfIdf.lakes,tfIdf.swathes,tfIdf.threadbare,tfIdf.roadblocked,tfIdf.msgs,tfIdf.rejoicing,tfIdf.besafebealert,tfIdf.strongertogether,tfIdf.shahada,tfIdf.nandurbar,tfIdf.borders,tfIdf.reliefment,tfIdf.catastrophe,tfIdf.continous,tfIdf.coimbatorerain,tfIdf.gruesome,tfIdf.gargantuan,tfIdf.mallu,tfIdf.allover,tfIdf.jaisal,tfIdf.talked,tfIdf.thaug,tfIdf.believeinrahul,tfIdf.disunite,tfIdf.malayali,tfIdf.trys,tfIdf.gandhis,tfIdf.responsibilities,tfIdf.kannadigas,tfIdf.chennairains,tfIdf.heavier,tfIdf.brigade,tfIdf.deployment,tfIdf.nil,tfIdf.latestnews,tfIdf.hindinews,tfIdf.idukkinews,tfIdf.indiannews,tfIdf.newsupdate,tfIdf.keralanews,tfIdf.lastyear,tfIdf.hillstations,tfIdf.reminding,tfIdf.oneyearofsurvival,tfIdf.hoilday,tfIdf.delugediaries,tfIdf.examines,tfIdf.brick,tfIdf.uprooting,tfIdf.localised,tfIdf.landless,tfIdf.westerlies,tfIdf.fading,tfIdf.commission,tfIdf.collectors,tfIdf.palani,tfIdf.thenmala,tfIdf.achyuths,tfIdf.kodaikkanal,tfIdf.suryanelli,tfIdf.inter,tfIdf.plzzz,tfIdf.augmenting,tfIdf.aara,tfIdf.chai,tfIdf.muse,tfIdf.nee,tfIdf.toast,tfIdf.pattanamthitta,tfIdf.cwc,tfIdf.prevails,tfIdf.examinations,tfIdf.soldiers,tfIdf.jcos,tfIdf.eco,tfIdf.task,tfIdf.valapatanam,tfIdf.mgmnt,tfIdf.kurumanpuzha,tfIdf.kuthirapuzha,tfIdf.centralwatercommission,tfIdf.officially,tfIdf.informs,tfIdf.apron,tfIdf.addresstothenation,tfIdf.overcoming,tfIdf.generouslykeralafloods,tfIdf.vyanadu,tfIdf.helpe,tfIdf.mundakkai,tfIdf.chooralmala,tfIdf.ayyappa,tfIdf.sanghi,tfIdf.bonkers,tfIdf.communism,tfIdf.sorts,tfIdf.keralafloodlive,tfIdf.kkd,tfIdf.overflowed,tfIdf.ncmc,tfIdf.chairs,tfIdf.heavyrainfall,tfIdf.sinha,tfIdf.kattappana,tfIdf.adimali,tfIdf.medicalcamp,tfIdf.rainrelief,tfIdf.drills,tfIdf.sos,tfIdf.verification,tfIdf.silt,tfIdf.flows,tfIdf.ernakulamleave,tfIdf.tommarow,tfIdf.edn,tfIdf.tributary,tfIdf.peruvammuzhy,tfIdf.ndrfu,tfIdf.pond,tfIdf.artificial,tfIdf.kozikhode,tfIdf.experiences,tfIdf.pooyamkutty,tfIdf.breaching,tfIdf.baiju,tfIdf.manikandanchal,tfIdf.laments,tfIdf.exposed,tfIdf.chennaifloods,tfIdf.mangalamdam,tfIdf.cherukunnapuzha,tfIdf.mangalam,tfIdf.attraction,tfIdf.dstrt,tfIdf.keralano,tfIdf.perinthalmanna,tfIdf.trissur,tfIdf.devendrafadnavis,tfIdf.disasterrelief,tfIdf.maharashtracabinet,tfIdf.jokers,tfIdf.firefighters,tfIdf.kalarkutty,tfIdf.kozikode,tfIdf.kallarkutty,tfIdf.temples,tfIdf.courtyard,tfIdf.assess,tfIdf.adversely
0,1128355711044521984,"[tsunami, threat, included, passed, powerful, ...",62140.0,"{9.324254,0.1151799,2.6183999,9.807115,6.34458...","-0.09 , -0.01 , 0.17 , -0.08 , 0.09 , 0.03 , ...",,6.0,0.0,0.0,195.0,696.0,1.557856e+12,1.128356e+18,0.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,134.0,3496.0,2215.0,0.0,178.0,0.0,0.0,0.0,0.0,0.0,0.411948,0.080729,0.013757,0.111970,0.098354,0.010751,0.036456,0.341889,0.137446,0.44091,0.212434,0.490421,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1128355449445736448,"[staysafe, papua, newguinea, tsunami, earthqua...",62077.0,"{3.4869452,-0.6036901,0.098899916,4.98685,2.16...","-0.12 , 0.11 , 0.18 , -0.09 , 0.04 , 0.00 , 0...",,6.0,0.0,0.0,100.0,357.0,1.557856e+12,1.128355e+18,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,65.0,5673.0,4996.0,0.0,374.0,0.0,0.0,0.0,0.0,0.0,,0.094184,0.032099,,,,0.042533,,,,,,0.913266,1.144315,0.744666,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,1128355212287254531,"[tsunami, threat, hawaii, morning, quake, hono...",62021.0,"{0.25552636,6.876851,-1.0695575,13.627133,4.18...","-0.16 , -0.03 , 0.11 , -0.01 , 0.09 , 0.01 , ...",,0.0,0.0,0.0,155.0,553.0,1.557856e+12,1.128355e+18,0.0,0.0,0.0,0.0,0.0,28.0,0.0,0.0,0.0,0.0,0.0,107.0,1350.0,501.0,0.0,78.0,0.0,0.0,0.0,0.0,0.0,,0.040365,,,0.098354,,,,0.137446,,,,,,,0.19116,0.3914,0.44091,0.510008,0.46891,0.341889,0.312927,0.411948,0.319143,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,1128355199448485888,"[imagine, tsunami, engulfing, britain, prayers...",62018.0,"{4.4890995,1.384184,-2.284342,3.0452402,4.2513...","-0.01 , 0.04 , 0.09 , -0.12 , 0.06 , 0.06 , 0...",,0.0,0.0,0.0,110.0,392.0,1.557856e+12,1.128355e+18,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,61.0,11.0,72.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.080729,,,,,,,,,,,,,,,,,,,,,,,0.311966,0.980842,0.334872,0.980842,0.683778,0.750922,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,1128355159887753216,"[powerful, magnitude, quake, hits, papua, guin...",62008.0,"{6.17479,-1.0230901,1.5341498,9.082365,5.94773...","-0.12 , -0.08 , -0.04 , 0.07 , -0.01 , 0.02 ,...",,0.0,0.0,0.0,129.0,460.0,1.557856e+12,1.128355e+18,0.0,0.0,1.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,79.0,454.0,357.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,,0.056511,0.019259,0.156757,0.137695,0.015051,,,,,,,,,,,,,,,,,,,,,,,,,0.20217,0.576728,0.151878,0.104285,0.219306,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13564,1159359542863548421,"[imd, issues, red, alert, idukki, malappuram, ...",138502.0,"{-0.930875,-5.0504575,-8.921433,6.67087,4.7533...","0.03 , 0.12 , 0.12 , -0.13 , -0.01 , 0.07 , 0...",,1.0,0.0,0.0,177.0,632.0,1.565248e+12,1.159360e+18,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,130.0,186.0,565.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.185941,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
13565,1159358275348942848,"[heavy, rains, kerala, scene, courtyard, palak...",138200.0,"{1.1679858,0.010114923,-3.8072298,1.39957,2.42...","-0.12 , 0.11 , 0.14 , -0.08 , 0.03 , 0.02 , 0...",,2.0,0.0,0.0,98.0,350.0,1.565248e+12,1.159358e+18,0.0,0.0,0.0,1.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,63.0,279.0,306.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.127866,,
13566,1159357325955649536,"[kerala, government, called, level, meet, asse...",137973.0,"{7.228066,3.1975458,-1.3576192,4.8822265,8.643...","-0.17 , -0.12 , 0.04 , 0.06 , 0.09 , -0.04 , ...",,2.0,0.0,0.0,221.0,789.0,1.565247e+12,1.159357e+18,0.0,0.0,1.0,0.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,145.0,371468.0,537.0,0.0,1359.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.526338,
13567,1159354521421619200,"[imd, issues, red, alert, idukki, malappuram, ...",137305.0,"{-0.930875,-5.0504575,-8.921433,6.67087,4.7533...","0.03 , 0.12 , 0.12 , -0.13 , -0.01 , 0.07 , 0...",,1.0,0.0,0.0,177.0,632.0,1.565247e+12,1.159355e+18,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,130.0,5437419.0,194.0,0.0,5742.0,0.0,0.0,0.0,0.0,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.185941,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [7]:
# Group by tweet
feature_vector_input = df.groupby(['tweet_id']).agg('first')
feature_vector_input

Unnamed: 0_level_0,tweet_text,offset,embeddings,sentiment,features.numb_of_urls,features.numb_of_hashtags,features.numb_of_personal_pronouns,features.numb_of_present_tenses,features.weighted_length,features.permillage,features.tweet_created_at,features.tweet_id_str,features.positive_sentiment,features.negative_sentiment,features.numb_of_mentions,features.numb_of_media,features.numb_of_past_tenses,features.numb_of_weird_chars,features.numb_of_questions,features.numb_of_emoticons,features.numb_of_swearing_word,features.numb_of_slang_words,features.numb_of_intensifiers,features.tweet_length,features.userFollowersCount,features.userFriendsCount,features.user_numb_of_tweets,features.user_list_count,features.dict_precision,features.dict_recall,features.dict_f_measure,features.offset,features.is_verified,tfIdf.bc,tfIdf.tsunami,tfIdf.papua,tfIdf.powerful,tfIdf.quake,tfIdf.guinea,tfIdf.earthquake,tfIdf.passed,tfIdf.threat,tfIdf.pacificocean,tfIdf.papuanewguinea,tfIdf.included,tfIdf.newguinea,tfIdf.staysafe,tfIdf.solomonislands,tfIdf.news,tfIdf.kitv,tfIdf.sports,tfIdf.amp,tfIdf.hawaii,tfIdf.channel,tfIdf.weather,tfIdf.honolulu,tfIdf.morning,tfIdf.britain,tfIdf.imagine,tfIdf.islands,tfIdf.engulfing,tfIdf.prayers,tfIdf.sea,tfIdf.hits,tfIdf.ring,tfIdf.alert,tfIdf.magnitude,tfIdf.issued,tfIdf.isolated,tfIdf.reported,tfIdf.hope,tfIdf.preliminary,tfIdf.lifted,tfIdf.warning,tfIdf.app,tfIdf.nothings,tfIdf.strong,tfIdf.earthbound,tfIdf.cmes,tfIdf.strikes,tfIdf.sun,tfIdf.coast,tfIdf.west,tfIdf.summary,tfIdf.kokopo,tfIdf.sse,tfIdf.gisborne,tfIdf.ne,tfIdf.report,tfIdf.zealand,tfIdf.ene,tfIdf.rocked,tfIdf.waves,tfIdf.epicenter,tfIdf.hazardous,tfIdf.coastal,tfIdf.massive,tfIdf.earthquakepapua,tfIdf.ripple,tfIdf.huge,tfIdf.portmoresby,tfIdf.usgs,tfIdf.radius,tfIdf.waved,tfIdf.abound,tfIdf.negative,tfIdf.paupau,tfIdf.earthquakes,tfIdf.magnet,tfIdf.geological,tfIdf.solomon,tfIdf.hit,tfIdf.survey,tfIdf.region,tfIdf.tuesday,tfIdf.rattled,tfIdf.earthquakemag,tfIdf.local,tfIdf.california,tfIdf.tsunamiwarning,tfIdf.read,tfIdf.thenews,tfIdf.cdt,tfIdf.occurred,tfIdf.tue,tfIdf.event,tfIdf.pacific,tfIdf.triggered,tfIdf.breaking,tfIdf.abc,tfIdf.island,tfIdf.residents,tfIdf.rattles,tfIdf.passes,tfIdf.cbc,tfIdf.repent,tfIdf.chance,tfIdf.israel,tfIdf.awakening,tfIdf.happening,tfIdf.god,tfIdf.country,tfIdf.stuck,tfIdf.nearby,tfIdf.triggering,tfIdf.central,tfIdf.nations,tfIdf.killed,tfIdf.subsiding,tfIdf.struck,tfIdf.hours,tfIdf.utc,tfIdf.erthquakes,tfIdf.volcanoes,tfIdf.ago,tfIdf.info,tfIdf.south,tfIdf.center,tfIdf.raised,tfIdf.late,tfIdf.depth,tfIdf.regionsolomon,tfIdf.location,tfIdf.magnitudemb,tfIdf.measured,tfIdf.papuanuevaguinea,tfIdf.northeast,tfIdf.australia,tfIdf.canberra,tfIdf.kilometers,tfIdf.located,tfIdf.evening,tfIdf.destructive,tfIdf.possibility,tfIdf.damage,tfIdf.images,tfIdf.expected,tfIdf.update,tfIdf.coming,tfIdf.papau,tfIdf.pray,tfIdf.fox,tfIdf.papa,tfIdf.animals,tfIdf.minutes,tfIdf.latestcomments,tfIdf.violent,tfIdf.shared,tfIdf.download,tfIdf.android,tfIdf.express,tfIdf.click,tfIdf.indian,tfIdf.expert,tfIdf.wise,tfIdf.hurricanes,tfIdf.bad,tfIdf.parties,tfIdf.hurricane,tfIdf.shots,tfIdf.prompting,tfIdf.prelim,tfIdf.reuters,tfIdf.prompts,tfIdf.lord,tfIdf.safe,tfIdf.expanded,tfIdf.usa,tfIdf.deep,tfIdf.solomons,tfIdf.tidal,tfIdf.risk,tfIdf.wave,tfIdf.overseas,tfIdf.moment,tfIdf.oceania,tfIdf.cancelled,tfIdf.headlines,tfIdf.developing,tfIdf.youtube,tfIdf.birthday,tfIdf.st,tfIdf.watch,tfIdf.israels,tfIdf.tsunamis,tfIdf.advertiser,tfIdf.star,tfIdf.poses,tfIdf.measuring,tfIdf.miles,tfIdf.earthquakeph,tfIdf.shallow,tfIdf.chain,tfIdf.north,tfIdf.knocking,tfIdf.safety,tfIdf.fleeing,tfIdf.sending,tfIdf.power,tfIdf.wake,tfIdf.rabaul,tfIdf.coasts,tfIdf.centre,tfIdf.epicentre,tfIdf.triggers,tfIdf.mag,tfIdf.eqalerts,tfIdf.ireland,tfIdf.newearthquake,tfIdf.namatanai,...,tfIdf.audition,tfIdf.kurup,tfIdf.mahe,tfIdf.floodsheavy,tfIdf.kanhangad,tfIdf.horanadu,tfIdf.breached,tfIdf.muvattupuzha,tfIdf.mavoor,tfIdf.schoolsafety,tfIdf.schoolsout,tfIdf.nedumbassery,tfIdf.chaired,tfIdf.rajani,tfIdf.incessantly,tfIdf.extremeweatherevents,tfIdf.southwestmonsoon,tfIdf.upward,tfIdf.preyed,tfIdf.nallathanni,tfIdf.avalanche,tfIdf.theburningquestion,tfIdf.udumalai,tfIdf.periyavarrai,tfIdf.providing,tfIdf.tada,tfIdf.repeated,tfIdf.mumbaipuneexpressway,tfIdf.refused,tfIdf.expressway,tfIdf.relent,tfIdf.heatmap,tfIdf.ibms,tfIdf.helpkerala,tfIdf.registrationkerala,tfIdf.wrecking,tfIdf.ravaging,tfIdf.toes,tfIdf.aviationdaily,tfIdf.avgeek,tfIdf.moolamattom,tfIdf.kottamala,tfIdf.ashramam,tfIdf.anim,tfIdf.skymet,tfIdf.tap,tfIdf.mobilises,tfIdf.paralyse,tfIdf.areekode,tfIdf.appaling,tfIdf.succumbed,tfIdf.kannadikkal,tfIdf.newss,tfIdf.livemint,tfIdf.oruvela,tfIdf.laam,tfIdf.adangidich,tfIdf.hostel,tfIdf.surviving,tfIdf.idduki,tfIdf.booked,tfIdf.depressed,tfIdf.kuttiady,tfIdf.adjacent,tfIdf.kasargode,tfIdf.helpforkerala,tfIdf.improvement,tfIdf.kochiinternational,tfIdf.waterloggeded,tfIdf.efficiency,tfIdf.signalling,tfIdf.nofakenews,tfIdf.understandably,tfIdf.ugh,tfIdf.limited,tfIdf.suspending,tfIdf.inaccessible,tfIdf.trivendrum,tfIdf.allepay,tfIdf.kovalam,tfIdf.newsflash,tfIdf.newsbulltein,tfIdf.diaster,tfIdf.taluks,tfIdf.mannarkkud,tfIdf.waybad,tfIdf.chennairain,tfIdf.satara,tfIdf.chiplun,tfIdf.edavanna,tfIdf.ideas,tfIdf.innovative,tfIdf.incapable,tfIdf.leverage,tfIdf.revisit,tfIdf.commentary,tfIdf.columns,tfIdf.defence,tfIdf.wayanads,tfIdf.practise,tfIdf.hotels,tfIdf.lakes,tfIdf.swathes,tfIdf.threadbare,tfIdf.roadblocked,tfIdf.msgs,tfIdf.rejoicing,tfIdf.besafebealert,tfIdf.strongertogether,tfIdf.shahada,tfIdf.nandurbar,tfIdf.borders,tfIdf.reliefment,tfIdf.catastrophe,tfIdf.continous,tfIdf.coimbatorerain,tfIdf.gruesome,tfIdf.gargantuan,tfIdf.mallu,tfIdf.allover,tfIdf.jaisal,tfIdf.talked,tfIdf.thaug,tfIdf.believeinrahul,tfIdf.disunite,tfIdf.malayali,tfIdf.trys,tfIdf.gandhis,tfIdf.responsibilities,tfIdf.kannadigas,tfIdf.chennairains,tfIdf.heavier,tfIdf.brigade,tfIdf.deployment,tfIdf.nil,tfIdf.latestnews,tfIdf.hindinews,tfIdf.idukkinews,tfIdf.indiannews,tfIdf.newsupdate,tfIdf.keralanews,tfIdf.lastyear,tfIdf.hillstations,tfIdf.reminding,tfIdf.oneyearofsurvival,tfIdf.hoilday,tfIdf.delugediaries,tfIdf.examines,tfIdf.brick,tfIdf.uprooting,tfIdf.localised,tfIdf.landless,tfIdf.westerlies,tfIdf.fading,tfIdf.commission,tfIdf.collectors,tfIdf.palani,tfIdf.thenmala,tfIdf.achyuths,tfIdf.kodaikkanal,tfIdf.suryanelli,tfIdf.inter,tfIdf.plzzz,tfIdf.augmenting,tfIdf.aara,tfIdf.chai,tfIdf.muse,tfIdf.nee,tfIdf.toast,tfIdf.pattanamthitta,tfIdf.cwc,tfIdf.prevails,tfIdf.examinations,tfIdf.soldiers,tfIdf.jcos,tfIdf.eco,tfIdf.task,tfIdf.valapatanam,tfIdf.mgmnt,tfIdf.kurumanpuzha,tfIdf.kuthirapuzha,tfIdf.centralwatercommission,tfIdf.officially,tfIdf.informs,tfIdf.apron,tfIdf.addresstothenation,tfIdf.overcoming,tfIdf.generouslykeralafloods,tfIdf.vyanadu,tfIdf.helpe,tfIdf.mundakkai,tfIdf.chooralmala,tfIdf.ayyappa,tfIdf.sanghi,tfIdf.bonkers,tfIdf.communism,tfIdf.sorts,tfIdf.keralafloodlive,tfIdf.kkd,tfIdf.overflowed,tfIdf.ncmc,tfIdf.chairs,tfIdf.heavyrainfall,tfIdf.sinha,tfIdf.kattappana,tfIdf.adimali,tfIdf.medicalcamp,tfIdf.rainrelief,tfIdf.drills,tfIdf.sos,tfIdf.verification,tfIdf.silt,tfIdf.flows,tfIdf.ernakulamleave,tfIdf.tommarow,tfIdf.edn,tfIdf.tributary,tfIdf.peruvammuzhy,tfIdf.ndrfu,tfIdf.pond,tfIdf.artificial,tfIdf.kozikhode,tfIdf.experiences,tfIdf.pooyamkutty,tfIdf.breaching,tfIdf.baiju,tfIdf.manikandanchal,tfIdf.laments,tfIdf.exposed,tfIdf.chennaifloods,tfIdf.mangalamdam,tfIdf.cherukunnapuzha,tfIdf.mangalam,tfIdf.attraction,tfIdf.dstrt,tfIdf.keralano,tfIdf.perinthalmanna,tfIdf.trissur,tfIdf.devendrafadnavis,tfIdf.disasterrelief,tfIdf.maharashtracabinet,tfIdf.jokers,tfIdf.firefighters,tfIdf.kalarkutty,tfIdf.kozikode,tfIdf.kallarkutty,tfIdf.temples,tfIdf.courtyard,tfIdf.assess,tfIdf.adversely
tweet_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1,Unnamed: 358_level_1,Unnamed: 359_level_1,Unnamed: 360_level_1,Unnamed: 361_level_1,Unnamed: 362_level_1,Unnamed: 363_level_1,Unnamed: 364_level_1,Unnamed: 365_level_1,Unnamed: 366_level_1,Unnamed: 367_level_1,Unnamed: 368_level_1,Unnamed: 369_level_1,Unnamed: 370_level_1,Unnamed: 371_level_1,Unnamed: 372_level_1,Unnamed: 373_level_1,Unnamed: 374_level_1,Unnamed: 375_level_1,Unnamed: 376_level_1,Unnamed: 377_level_1,Unnamed: 378_level_1,Unnamed: 379_level_1,Unnamed: 380_level_1,Unnamed: 381_level_1,Unnamed: 382_level_1,Unnamed: 383_level_1,Unnamed: 384_level_1,Unnamed: 385_level_1,Unnamed: 386_level_1,Unnamed: 387_level_1,Unnamed: 388_level_1,Unnamed: 389_level_1,Unnamed: 390_level_1,Unnamed: 391_level_1,Unnamed: 392_level_1,Unnamed: 393_level_1,Unnamed: 394_level_1,Unnamed: 395_level_1,Unnamed: 396_level_1,Unnamed: 397_level_1,Unnamed: 398_level_1,Unnamed: 399_level_1,Unnamed: 400_level_1,Unnamed: 401_level_1,Unnamed: 402_level_1,Unnamed: 403_level_1,Unnamed: 404_level_1,Unnamed: 405_level_1,Unnamed: 406_level_1,Unnamed: 407_level_1,Unnamed: 408_level_1,Unnamed: 409_level_1,Unnamed: 410_level_1,Unnamed: 411_level_1,Unnamed: 412_level_1,Unnamed: 413_level_1,Unnamed: 414_level_1,Unnamed: 415_level_1,Unnamed: 416_level_1,Unnamed: 417_level_1,Unnamed: 418_level_1,Unnamed: 419_level_1,Unnamed: 420_level_1,Unnamed: 421_level_1,Unnamed: 422_level_1,Unnamed: 423_level_1,Unnamed: 424_level_1,Unnamed: 425_level_1,Unnamed: 426_level_1,Unnamed: 427_level_1,Unnamed: 428_level_1,Unnamed: 429_level_1,Unnamed: 430_level_1,Unnamed: 431_level_1,Unnamed: 432_level_1,Unnamed: 433_level_1,Unnamed: 434_level_1,Unnamed: 435_level_1,Unnamed: 436_level_1,Unnamed: 437_level_1,Unnamed: 438_level_1,Unnamed: 439_level_1,Unnamed: 440_level_1,Unnamed: 441_level_1,Unnamed: 442_level_1,Unnamed: 443_level_1,Unnamed: 444_level_1,Unnamed: 445_level_1,Unnamed: 446_level_1,Unnamed: 447_level_1,Unnamed: 448_level_1,Unnamed: 449_level_1,Unnamed: 450_level_1,Unnamed: 451_level_1,Unnamed: 452_level_1,Unnamed: 453_level_1,Unnamed: 454_level_1,Unnamed: 455_level_1,Unnamed: 456_level_1,Unnamed: 457_level_1,Unnamed: 458_level_1,Unnamed: 459_level_1,Unnamed: 460_level_1,Unnamed: 461_level_1,Unnamed: 462_level_1,Unnamed: 463_level_1,Unnamed: 464_level_1,Unnamed: 465_level_1,Unnamed: 466_level_1,Unnamed: 467_level_1,Unnamed: 468_level_1,Unnamed: 469_level_1,Unnamed: 470_level_1,Unnamed: 471_level_1,Unnamed: 472_level_1,Unnamed: 473_level_1,Unnamed: 474_level_1,Unnamed: 475_level_1,Unnamed: 476_level_1,Unnamed: 477_level_1,Unnamed: 478_level_1,Unnamed: 479_level_1,Unnamed: 480_level_1,Unnamed: 481_level_1,Unnamed: 482_level_1,Unnamed: 483_level_1,Unnamed: 484_level_1,Unnamed: 485_level_1,Unnamed: 486_level_1,Unnamed: 487_level_1,Unnamed: 488_level_1,Unnamed: 489_level_1,Unnamed: 490_level_1,Unnamed: 491_level_1,Unnamed: 492_level_1,Unnamed: 493_level_1,Unnamed: 494_level_1,Unnamed: 495_level_1,Unnamed: 496_level_1,Unnamed: 497_level_1,Unnamed: 498_level_1,Unnamed: 499_level_1,Unnamed: 500_level_1,Unnamed: 501_level_1
1128095078063284224,"[weeks, earthquake, bulolo, disrupted, png, ne...",0.0,"{1.7933619,0.06316902,2.3596811,7.2150307,4.83...","-0.02 , -0.02 , 0.10 , -0.08 , 0.12 , 0.02 , ...",,1.0,0.0,0.0,189.0,675.0,1.557794e+12,1.128095e+18,0.0,0.0,1.0,0.0,0.0,26.0,0.0,0.0,0.0,0.0,0.0,116.0,3537.0,712.0,0.0,70.0,0.0,0.0,0.0,0.0,0.0,,,,,,,0.019630,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1128250422572015616,"[ses, networks, png, dataco, restore, connecti...",37037.0,"{4.9856396,-0.08365916,2.2851202,9.376361,2.81...","-0.06 , -0.05 , 0.06 , -0.07 , 0.12 , -0.04 ,...",,0.0,0.0,0.0,136.0,485.0,1.557831e+12,1.128250e+18,0.0,0.0,0.0,1.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,88.0,1524.0,102.0,0.0,64.0,0.0,0.0,0.0,0.0,0.0,,,0.019259,,,0.015051,0.025520,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1128285344506494977,"[earthquake, britain, region, mins, ago, utc, ...",45363.0,"{0.9927401,2.302358,0.978428,3.2636597,2.48808...","0.16 , -0.01 , 0.02 , 0.13 , 0.00 , 0.28 , 0....",,1.0,0.0,0.0,129.0,460.0,1.557839e+12,1.128285e+18,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,59.0,5849.0,45.0,0.0,84.0,0.0,0.0,0.0,0.0,0.0,,,,,,,0.031899,,,,,,,,,,,,,,,,,,0.27297,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.48377,,,,,,,,,,,,,,,,,,,,,,,,,0.304384,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.366758,,,0.400291,0.441711,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1128285344548442112,"[major, mag, earthquake, britain, region, tues...",45363.0,"{2.6244092,1.9538361,-0.06411208,3.4310098,2.2...","-0.04 , 0.13 , 0.11 , -0.11 , -0.00 , 0.07 , ...",,1.0,0.0,0.0,139.0,496.0,1.557839e+12,1.128285e+18,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,57.0,7452.0,70.0,0.0,252.0,0.0,0.0,0.0,0.0,0.0,,,,,,,0.042533,,,,,,,,,,,,,,,,,,0.36396,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.405846,0.476426,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.653575,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1128285482784366592,"[earthquake, britain, region, tue, utc, minute...",45396.0,"{2.1470141,6.699149,2.3303783,7.55179,4.187389...","0.17 , 0.04 , -0.00 , 0.21 , -0.03 , 0.34 , -...",,0.0,0.0,0.0,219.0,782.0,1.557839e+12,1.128285e+18,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,92.0,4416.0,4860.0,0.0,29.0,0.0,0.0,0.0,0.0,0.0,,,,,,,0.021266,,,,,,,,,,,,0.297505,,,,,,0.18198,,,,,,,,,,,,,,,,,0.336056,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.202923,,,,,,,,,,,0.30065,,,,,,,,,,,,,,,,,,,,,,,,,,,0.244505,0.365082,0.346487,0.266861,0.294474,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.346487,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1162005174468132869,"[thousands, evacuated, typhoon, krosa, pounds,...",569433.0,"{3.9924033,1.1286498,2.0563705,3.49563,4.25643...","-0.02 , -0.05 , 0.08 , -0.08 , 0.14 , 0.01 , ...",,0.0,0.0,0.0,98.0,350.0,1.565879e+12,1.162005e+18,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,79.0,83.0,328.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1162005634813849602,"[died, august, flood, incidents, kerala, injur...",769380.0,"{2.0559,0.23130395,-0.2240961,1.780455,2.70426...","-0.04 , -0.03 , 0.06 , -0.04 , 0.10 , 0.04 , ...",,1.0,0.0,0.0,124.0,442.0,1.565879e+12,1.162006e+18,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,72.0,65.0,98.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1162005861075750918,"[view, krosa, russian, wildland, fire, smoke, ...",569596.0,"{2.6357827,4.143295,3.7576637,7.7977004,8.4893...","-0.13 , -0.06 , 0.06 , 0.03 , -0.01 , 0.12 , ...",,5.0,0.0,0.0,240.0,857.0,1.565879e+12,1.162006e+18,0.0,0.0,0.0,1.0,0.0,42.0,0.0,0.0,0.0,0.0,0.0,161.0,1129.0,1049.0,0.0,174.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1162006062867918848,"[powerful, typhoon, krosa, landfall, japan, in...",569645.0,"{3.236566,0.297153,2.6936,4.8573704,2.4796422,...","-0.13 , -0.01 , 0.11 , -0.04 , 0.04 , 0.08 , ...",,1.0,0.0,0.0,117.0,417.0,1.565879e+12,1.162006e+18,0.0,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,58.0,3368.0,181.0,0.0,1548.0,0.0,0.0,0.0,0.0,0.0,,,,0.418517,,,,,,,,,,,,0.391959,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Load the labelled data

These are generated in 0_Labels.ipynb

In [8]:
# dtypes needs to be specified or long ints will change
labels_df = pd.read_json("../../../data/input/raw/data/2020/2020-A/labels/TRECIS-2018-2020A-labels.json", dtype={} )

# replace the event with a numeric value
labels_df = labels_df.replace({'eventID': event_int_map})

# Count the number of labels
labels_df['num'] = labels_df['postCategories'].str.len()

# Map the priority to a numeric value
labels_df = labels_df.replace({"postPriority": priority_mapping})

# Split categories and map to numeric values
category_list = pd.DataFrame(labels_df["postCategories"].to_list(), columns=['cat1', 'cat2', 'cat3',
                                                                   'cat4', 'cat5', 'cat6',
                                                                   'cat7', 'cat8', 'cat9', 'cat10'])


# Map the categories to numeric values
category_list = category_list.applymap(lambda s: mymap.get(s) if s in mymap else s)


# Join back onto our original list
labels = labels_df.join(category_list)

# Drop the string categories
labels.drop(['postCategories'], axis = 1, inplace = True)

# Tidy
labels = labels.drop(['eventName', 'eventDescription', 'eventType'], axis=1)

# Fill the NaN slots with 0
labels = labels.fillna("0")

# Export
labels.to_csv("../labels.csv", index=False)
labels = clean_dataset_int(labels)
#labels = clean_dataset_new(labels)

labels
df = pd.merge(labels, feature_vector_input, left_on = 'postID', right_on = 'tweet_id', how = 'inner')
df

INFO - NumExpr defaulting to 8 threads.


Unnamed: 0,eventID,postID,postPriority,num,cat1,cat2,cat3,cat4,cat5,cat6,cat7,cat8,cat9,cat10,tweet_text,offset,embeddings,sentiment,features.numb_of_urls,features.numb_of_hashtags,features.numb_of_personal_pronouns,features.numb_of_present_tenses,features.weighted_length,features.permillage,features.tweet_created_at,features.tweet_id_str,features.positive_sentiment,features.negative_sentiment,features.numb_of_mentions,features.numb_of_media,features.numb_of_past_tenses,features.numb_of_weird_chars,features.numb_of_questions,features.numb_of_emoticons,features.numb_of_swearing_word,features.numb_of_slang_words,features.numb_of_intensifiers,features.tweet_length,features.userFollowersCount,features.userFriendsCount,features.user_numb_of_tweets,features.user_list_count,features.dict_precision,features.dict_recall,features.dict_f_measure,features.offset,features.is_verified,tfIdf.bc,tfIdf.tsunami,tfIdf.papua,tfIdf.powerful,tfIdf.quake,tfIdf.guinea,tfIdf.earthquake,tfIdf.passed,tfIdf.threat,tfIdf.pacificocean,tfIdf.papuanewguinea,tfIdf.included,tfIdf.newguinea,tfIdf.staysafe,tfIdf.solomonislands,tfIdf.news,tfIdf.kitv,tfIdf.sports,tfIdf.amp,tfIdf.hawaii,tfIdf.channel,tfIdf.weather,tfIdf.honolulu,tfIdf.morning,tfIdf.britain,tfIdf.imagine,tfIdf.islands,tfIdf.engulfing,tfIdf.prayers,tfIdf.sea,tfIdf.hits,tfIdf.ring,tfIdf.alert,tfIdf.magnitude,tfIdf.issued,tfIdf.isolated,tfIdf.reported,tfIdf.hope,tfIdf.preliminary,tfIdf.lifted,tfIdf.warning,tfIdf.app,tfIdf.nothings,tfIdf.strong,tfIdf.earthbound,tfIdf.cmes,tfIdf.strikes,tfIdf.sun,tfIdf.coast,tfIdf.west,tfIdf.summary,tfIdf.kokopo,tfIdf.sse,tfIdf.gisborne,tfIdf.ne,tfIdf.report,tfIdf.zealand,tfIdf.ene,tfIdf.rocked,tfIdf.waves,tfIdf.epicenter,tfIdf.hazardous,tfIdf.coastal,tfIdf.massive,tfIdf.earthquakepapua,tfIdf.ripple,tfIdf.huge,tfIdf.portmoresby,tfIdf.usgs,tfIdf.radius,tfIdf.waved,tfIdf.abound,tfIdf.negative,tfIdf.paupau,tfIdf.earthquakes,tfIdf.magnet,tfIdf.geological,tfIdf.solomon,tfIdf.hit,tfIdf.survey,tfIdf.region,tfIdf.tuesday,tfIdf.rattled,tfIdf.earthquakemag,tfIdf.local,tfIdf.california,tfIdf.tsunamiwarning,tfIdf.read,tfIdf.thenews,tfIdf.cdt,tfIdf.occurred,tfIdf.tue,tfIdf.event,tfIdf.pacific,tfIdf.triggered,tfIdf.breaking,tfIdf.abc,tfIdf.island,tfIdf.residents,tfIdf.rattles,tfIdf.passes,tfIdf.cbc,tfIdf.repent,tfIdf.chance,tfIdf.israel,tfIdf.awakening,tfIdf.happening,tfIdf.god,tfIdf.country,tfIdf.stuck,tfIdf.nearby,tfIdf.triggering,tfIdf.central,tfIdf.nations,tfIdf.killed,tfIdf.subsiding,tfIdf.struck,tfIdf.hours,tfIdf.utc,tfIdf.erthquakes,tfIdf.volcanoes,tfIdf.ago,tfIdf.info,tfIdf.south,tfIdf.center,tfIdf.raised,tfIdf.late,tfIdf.depth,tfIdf.regionsolomon,tfIdf.location,tfIdf.magnitudemb,tfIdf.measured,tfIdf.papuanuevaguinea,tfIdf.northeast,tfIdf.australia,tfIdf.canberra,tfIdf.kilometers,tfIdf.located,tfIdf.evening,tfIdf.destructive,tfIdf.possibility,tfIdf.damage,tfIdf.images,tfIdf.expected,tfIdf.update,tfIdf.coming,tfIdf.papau,tfIdf.pray,tfIdf.fox,tfIdf.papa,tfIdf.animals,tfIdf.minutes,tfIdf.latestcomments,tfIdf.violent,tfIdf.shared,tfIdf.download,tfIdf.android,tfIdf.express,tfIdf.click,tfIdf.indian,tfIdf.expert,tfIdf.wise,tfIdf.hurricanes,tfIdf.bad,tfIdf.parties,tfIdf.hurricane,tfIdf.shots,tfIdf.prompting,tfIdf.prelim,tfIdf.reuters,tfIdf.prompts,tfIdf.lord,tfIdf.safe,tfIdf.expanded,tfIdf.usa,tfIdf.deep,tfIdf.solomons,tfIdf.tidal,tfIdf.risk,tfIdf.wave,tfIdf.overseas,tfIdf.moment,tfIdf.oceania,tfIdf.cancelled,tfIdf.headlines,tfIdf.developing,tfIdf.youtube,tfIdf.birthday,tfIdf.st,tfIdf.watch,tfIdf.israels,tfIdf.tsunamis,tfIdf.advertiser,tfIdf.star,tfIdf.poses,tfIdf.measuring,tfIdf.miles,tfIdf.earthquakeph,tfIdf.shallow,tfIdf.chain,tfIdf.north,tfIdf.knocking,tfIdf.safety,...,tfIdf.audition,tfIdf.kurup,tfIdf.mahe,tfIdf.floodsheavy,tfIdf.kanhangad,tfIdf.horanadu,tfIdf.breached,tfIdf.muvattupuzha,tfIdf.mavoor,tfIdf.schoolsafety,tfIdf.schoolsout,tfIdf.nedumbassery,tfIdf.chaired,tfIdf.rajani,tfIdf.incessantly,tfIdf.extremeweatherevents,tfIdf.southwestmonsoon,tfIdf.upward,tfIdf.preyed,tfIdf.nallathanni,tfIdf.avalanche,tfIdf.theburningquestion,tfIdf.udumalai,tfIdf.periyavarrai,tfIdf.providing,tfIdf.tada,tfIdf.repeated,tfIdf.mumbaipuneexpressway,tfIdf.refused,tfIdf.expressway,tfIdf.relent,tfIdf.heatmap,tfIdf.ibms,tfIdf.helpkerala,tfIdf.registrationkerala,tfIdf.wrecking,tfIdf.ravaging,tfIdf.toes,tfIdf.aviationdaily,tfIdf.avgeek,tfIdf.moolamattom,tfIdf.kottamala,tfIdf.ashramam,tfIdf.anim,tfIdf.skymet,tfIdf.tap,tfIdf.mobilises,tfIdf.paralyse,tfIdf.areekode,tfIdf.appaling,tfIdf.succumbed,tfIdf.kannadikkal,tfIdf.newss,tfIdf.livemint,tfIdf.oruvela,tfIdf.laam,tfIdf.adangidich,tfIdf.hostel,tfIdf.surviving,tfIdf.idduki,tfIdf.booked,tfIdf.depressed,tfIdf.kuttiady,tfIdf.adjacent,tfIdf.kasargode,tfIdf.helpforkerala,tfIdf.improvement,tfIdf.kochiinternational,tfIdf.waterloggeded,tfIdf.efficiency,tfIdf.signalling,tfIdf.nofakenews,tfIdf.understandably,tfIdf.ugh,tfIdf.limited,tfIdf.suspending,tfIdf.inaccessible,tfIdf.trivendrum,tfIdf.allepay,tfIdf.kovalam,tfIdf.newsflash,tfIdf.newsbulltein,tfIdf.diaster,tfIdf.taluks,tfIdf.mannarkkud,tfIdf.waybad,tfIdf.chennairain,tfIdf.satara,tfIdf.chiplun,tfIdf.edavanna,tfIdf.ideas,tfIdf.innovative,tfIdf.incapable,tfIdf.leverage,tfIdf.revisit,tfIdf.commentary,tfIdf.columns,tfIdf.defence,tfIdf.wayanads,tfIdf.practise,tfIdf.hotels,tfIdf.lakes,tfIdf.swathes,tfIdf.threadbare,tfIdf.roadblocked,tfIdf.msgs,tfIdf.rejoicing,tfIdf.besafebealert,tfIdf.strongertogether,tfIdf.shahada,tfIdf.nandurbar,tfIdf.borders,tfIdf.reliefment,tfIdf.catastrophe,tfIdf.continous,tfIdf.coimbatorerain,tfIdf.gruesome,tfIdf.gargantuan,tfIdf.mallu,tfIdf.allover,tfIdf.jaisal,tfIdf.talked,tfIdf.thaug,tfIdf.believeinrahul,tfIdf.disunite,tfIdf.malayali,tfIdf.trys,tfIdf.gandhis,tfIdf.responsibilities,tfIdf.kannadigas,tfIdf.chennairains,tfIdf.heavier,tfIdf.brigade,tfIdf.deployment,tfIdf.nil,tfIdf.latestnews,tfIdf.hindinews,tfIdf.idukkinews,tfIdf.indiannews,tfIdf.newsupdate,tfIdf.keralanews,tfIdf.lastyear,tfIdf.hillstations,tfIdf.reminding,tfIdf.oneyearofsurvival,tfIdf.hoilday,tfIdf.delugediaries,tfIdf.examines,tfIdf.brick,tfIdf.uprooting,tfIdf.localised,tfIdf.landless,tfIdf.westerlies,tfIdf.fading,tfIdf.commission,tfIdf.collectors,tfIdf.palani,tfIdf.thenmala,tfIdf.achyuths,tfIdf.kodaikkanal,tfIdf.suryanelli,tfIdf.inter,tfIdf.plzzz,tfIdf.augmenting,tfIdf.aara,tfIdf.chai,tfIdf.muse,tfIdf.nee,tfIdf.toast,tfIdf.pattanamthitta,tfIdf.cwc,tfIdf.prevails,tfIdf.examinations,tfIdf.soldiers,tfIdf.jcos,tfIdf.eco,tfIdf.task,tfIdf.valapatanam,tfIdf.mgmnt,tfIdf.kurumanpuzha,tfIdf.kuthirapuzha,tfIdf.centralwatercommission,tfIdf.officially,tfIdf.informs,tfIdf.apron,tfIdf.addresstothenation,tfIdf.overcoming,tfIdf.generouslykeralafloods,tfIdf.vyanadu,tfIdf.helpe,tfIdf.mundakkai,tfIdf.chooralmala,tfIdf.ayyappa,tfIdf.sanghi,tfIdf.bonkers,tfIdf.communism,tfIdf.sorts,tfIdf.keralafloodlive,tfIdf.kkd,tfIdf.overflowed,tfIdf.ncmc,tfIdf.chairs,tfIdf.heavyrainfall,tfIdf.sinha,tfIdf.kattappana,tfIdf.adimali,tfIdf.medicalcamp,tfIdf.rainrelief,tfIdf.drills,tfIdf.sos,tfIdf.verification,tfIdf.silt,tfIdf.flows,tfIdf.ernakulamleave,tfIdf.tommarow,tfIdf.edn,tfIdf.tributary,tfIdf.peruvammuzhy,tfIdf.ndrfu,tfIdf.pond,tfIdf.artificial,tfIdf.kozikhode,tfIdf.experiences,tfIdf.pooyamkutty,tfIdf.breaching,tfIdf.baiju,tfIdf.manikandanchal,tfIdf.laments,tfIdf.exposed,tfIdf.chennaifloods,tfIdf.mangalamdam,tfIdf.cherukunnapuzha,tfIdf.mangalam,tfIdf.attraction,tfIdf.dstrt,tfIdf.keralano,tfIdf.perinthalmanna,tfIdf.trissur,tfIdf.devendrafadnavis,tfIdf.disasterrelief,tfIdf.maharashtracabinet,tfIdf.jokers,tfIdf.firefighters,tfIdf.kalarkutty,tfIdf.kozikode,tfIdf.kallarkutty,tfIdf.temples,tfIdf.courtyard,tfIdf.assess,tfIdf.adversely
0,45.0,1.128285e+18,7.5,2.0,13.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[earthquake, britain, region, tue, utc, minute...",45396.0,"{2.1470141,6.699149,2.3303783,7.55179,4.187389...","0.17 , 0.04 , -0.00 , 0.21 , -0.03 , 0.34 , -...",,0.0,0.0,0.0,219.0,782.0,1.557839e+12,1.128285e+18,0.0,0.0,0.0,0.0,0.0,24.0,0.0,0.0,0.0,0.0,0.0,92.0,4416.0,4860.0,0.0,29.0,0.0,0.0,0.0,0.0,0.0,,,,,,,0.021266,,,,,,,,,,,,0.297505,,,,,,0.181980,,,,,,,,,,,,,,,,,0.336056,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.202923,,,,,,,,,,,0.30065,,,,,,,,,,,,,,,,,,,,,,,,,,,0.244505,0.365082,0.346487,0.266861,0.294474,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.346487,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,45.0,1.128286e+18,2.5,3.0,13.0,7.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[eqalerts, earthquake, britain, region, mins, ...",45439.0,"{2.526981,2.259468,1.6547979,4.1474795,2.77224...","0.15 , -0.02 , 0.07 , 0.01 , 0.02 , 0.20 , 0....",,1.0,0.0,0.0,139.0,496.0,1.557839e+12,1.128286e+18,0.0,0.0,0.0,0.0,0.0,18.0,0.0,0.0,0.0,0.0,0.0,69.0,2420.0,490.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,,,,,,,0.028355,,,,,,,,,,,,,,,,,,0.242640,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.430018,,,,,,,,,,,,,,,,,,,,,,,,,0.270564,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.326007,,,0.355814,0.392632,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,45.0,1.128286e+18,2.5,2.0,13.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[earthquake, hkt, britain, region, papua, guinea]",45461.0,"{1.760538,0.6848818,-1.090252,5.72876,1.441569...","-0.15 , 0.03 , 0.12 , -0.03 , 0.04 , 0.00 , 0...",,0.0,0.0,0.0,114.0,407.0,1.557839e+12,1.128286e+18,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,49.0,4680.0,4.0,0.0,107.0,0.0,0.0,0.0,0.0,0.0,,,0.032099,,,0.025085,0.042533,,,,,,,,,,,,,,,,,,0.363960,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.405846,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,45.0,1.128286e+18,2.5,4.0,13.0,15.0,7.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,"[breaking, britain, papua, guinea, strong, ear...",45598.0,"{5.3082175,-1.1664782,-1.8848007,8.393738,6.27...","-0.15 , 0.01 , 0.09 , 0.00 , 0.03 , 0.00 , -0...",,5.0,0.0,0.0,155.0,553.0,1.557839e+12,1.128286e+18,0.0,0.0,0.0,1.0,0.0,22.0,0.0,0.0,0.0,0.0,0.0,96.0,33248.0,3665.0,0.0,706.0,0.0,0.0,0.0,0.0,0.0,,,0.017508,,,0.013683,0.023200,,,,,,,,,,,,,,,,,,0.198524,,,,,,,,,,,,,,,,,,,0.246092,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.241927,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,45.0,1.128286e+18,2.5,3.0,13.0,7.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[earthquake, mag, ene, kokopo, papua, guinea, ...",45603.0,"{3.286875,5.14759,-1.3272301,12.569706,7.19217...","-0.08 , 0.09 , 0.15 , -0.15 , 0.14 , -0.06 , ...",,2.0,0.0,0.0,235.0,839.0,1.557839e+12,1.128286e+18,0.0,0.0,0.0,0.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,125.0,13.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.011329,,,0.008853,0.015011,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.118462,,,,,,0.207864,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2563,47.0,1.161986e+18,5.0,4.0,14.0,25.0,13.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,"[advised, evacuate, storm, bears, japan, news,...",564898.0,"{1.326777,1.6615999,2.65488,3.4790537,3.093019...","0.06 , -0.07 , 0.10 , -0.02 , 0.10 , 0.08 , 0...",,3.0,0.0,0.0,126.0,450.0,1.565874e+12,1.161986e+18,0.0,0.0,0.0,0.0,0.0,16.0,0.0,0.0,0.0,0.0,0.0,70.0,66.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,0.342965,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2564,47.0,1.161989e+18,2.5,3.0,13.0,15.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[globe, view, krosa, himawari, ahi, truecolor,...",565491.0,"{-0.7140419,4.2214427,-1.9052658,9.339899,9.62...","-0.11 , 0.09 , 0.10 , -0.03 , 0.10 , -0.01 , ...",,5.0,0.0,0.0,222.0,792.0,1.565875e+12,1.161989e+18,0.0,0.0,0.0,1.0,0.0,32.0,0.0,0.0,0.0,0.0,0.0,128.0,1129.0,1049.0,0.0,174.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2565,47.0,1.161993e+18,5.0,2.0,25.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[tropical, storm, krosa, landfall, western, ja...",566469.0,"{2.325045,0.911123,0.8409699,3.4537637,0.02907...","-0.07 , 0.12 , 0.13 , -0.10 , 0.01 , 0.04 , 0...",,0.0,0.0,0.0,79.0,282.0,1.565876e+12,1.161993e+18,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,50.0,3929.0,4.0,0.0,75.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2566,47.0,1.161998e+18,2.5,1.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"[worst, nagoya, damage, typhoonkrosa, typhoon]",567731.0,"{1.103184,2.0794048,0.13763995,3.19584,2.01222...","-0.10 , 0.06 , 0.16 , -0.08 , 0.09 , 0.00 , 0...",,2.0,0.0,0.0,100.0,357.0,1.565877e+12,1.161998e+18,0.0,0.0,0.0,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,46.0,729.0,305.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.052998,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
df = expand_sentiment(df)

KeyboardInterrupt: 

In [None]:
df = expand_embeddings(df)

In [None]:
# Drop the string categories we no longer need
df.drop(['tweet_text', 'embeddings', 'sentiment'], axis = 1, inplace = True)

In [None]:
feature_vector_input.drop(['tweet_text', 'embeddings', 'sentiment'], axis = 1, inplace = True)
feature_vector_input

# Train.csv


We merge the feature vector with the annotated data, this is used to train the classifier.

In [None]:
# Merges the input feature vector with the labels
train = pd.merge(labels, feature_vector_input, left_on = 'postID', right_on = 'tweet_id', how = 'inner')

# reset the index
train = train.reset_index()

# fill NaN and replace Infinity
train.fillna(0, inplace=True)
train.replace([np.inf, -np.inf], np.nan, inplace=True)
train = train.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)

# export to csv
train.to_csv("../train.csv", index=False)

train

# Test.csv


Drops the categories, number of categories and priority so we can make our prediction

In [None]:
# Take a copy of the train dataframe
test = train

# Drop cat*
test.drop(list(test.filter(regex = 'cat\d+$')), axis = 1, inplace = True)

# Drop priority / num (of labels)
test.drop(['postPriority', 'num'], axis = 1, inplace = True)

# export
test.to_csv("../test.csv", index=False)

test

# Igel

This notebook implements `Igel` and supports all sklearn's machine learning functionality.

Caution must be taken to avoid overfitting. See `docs/ml.md` for more information

Igel's supported models:

        +--------------------+----------------------------+-------------------------+
        |      regression    |        classification      |        clustering       |
        +--------------------+----------------------------+-------------------------+
        |   LinearRegression |         LogisticRegression |                  KMeans |
        |              Lasso |                      Ridge |     AffinityPropagation |
        |          LassoLars |               DecisionTree |                   Birch |
        | BayesianRegression |                  ExtraTree | AgglomerativeClustering |
        |    HuberRegression |               RandomForest |    FeatureAgglomeration |
        |              Ridge |                 ExtraTrees |                  DBSCAN |
        |  PoissonRegression |                        SVM |         MiniBatchKMeans |
        |      ARDRegression |                  LinearSVM |    SpectralBiclustering |
        |  TweedieRegression |                      NuSVM |    SpectralCoclustering |
        | TheilSenRegression |            NearestNeighbor |      SpectralClustering |
        |    GammaRegression |              NeuralNetwork |               MeanShift |
        |   RANSACRegression | PassiveAgressiveClassifier |                  OPTICS |
        |       DecisionTree |                 Perceptron |                    ---- |
        |          ExtraTree |               BernoulliRBM |                    ---- |
        |       RandomForest |           BoltzmannMachine |                    ---- |
        |         ExtraTrees |       CalibratedClassifier |                    ---- |
        |                SVM |                   Adaboost |                    ---- |
        |          LinearSVM |                    Bagging |                    ---- |
        |              NuSVM |           GradientBoosting |                    ---- |
        |    NearestNeighbor |        BernoulliNaiveBayes |                    ---- |
        |      NeuralNetwork |      CategoricalNaiveBayes |                    ---- |
        |         ElasticNet |       ComplementNaiveBayes |                    ---- |
        |       BernoulliRBM |         GaussianNaiveBayes |                    ---- |
        |   BoltzmannMachine |      MultinomialNaiveBayes |                    ---- |
        |           Adaboost |                       ---- |                    ---- |
        |            Bagging |                       ---- |                    ---- |
        |   GradientBoosting |                       ---- |                    ---- |
        +--------------------+----------------------------+-------------------------+

In [None]:
# Train the model

params = {
        'cmd': 'fit',    
        'data_path': "../train.csv",
        'yaml_path': 'yaml/multi.yaml'  # DecisionTree
}

Igel(**params)

In [None]:
# Use model to predict on missing values

params = {
        'cmd': 'predict',    
        'data_path': "../test.csv",
        'yaml_path': 'yaml/hyper.yaml'
}
Igel(**params)

# Predictions

1. View the raw predictions
2. Map the labels to their High Level Information Types
3. Merge the predictions back into the training set


In [None]:
def is_neg_predictions(predictions):
    predictions = predictions.sort_values(by=['postPriority'])
    predictions = predictions[(predictions > 0).all(1)]
    predictions.round()

predictions = pd.read_csv("model_results/predictions.csv")
predictions

#is_neg_predictions(predictions)

### Merge the new predictions back onto dataframe with the missing columns

In [None]:
# Map the labels to their High Level Information Types
cat_list = predictions.filter(regex='cat', axis=1).round().applymap(lambda x: highCategoriser[x])

#
predictions = cat_list.combine_first(predictions)

# Merge the predictions back into the training set
df = test.merge(predictions, left_index=True, right_index=True)


# Append the predicted categories to a list in a new column
df['predicted_categories'] = df[['cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10']].values.tolist()


# Get the number of categories into something we can use to index
df['num'] = df['num'].astype(float).astype(int)

# Remove categories beyond what the tweet is predicted to have
df['categories'] = df.apply(lambda x: x['predicted_categories'][0:x['num']], axis=1)

df

## Export

Export in the TRECIS format

In [None]:
# write to .run file
with open("marks2.run" , "w") as out_file:
    for row in df.drop_duplicates(subset="postID").itertuples():
        #print("row:", row)
        content = [
            "TRECIS-CTIT-H-Test-0" + str(int(row.eventID)),
            "Q0",          
            np.int64(row.postID),   
            getattr(row, 'Index'),  #ToDo: Fix?
            #row.priority,
            str(priority_scorer[str(round(row.postPriority))[:3]]),  #ToDo: Fix
            row.categories,
            "marksrun2"
        ]
        out_file.write("\t".join([str(x) for x in content]) + "\n")

In [None]:
#

In [None]:
#df['tweets'] = df['tweets'].str[2:-1]
df


In [None]:
# JSONDecodeError: Expecting ',' delimiter: 
#json_response = json.loads('[' + q.text + '],')

#print(data)

In [None]:
#file_list = ["../output/run2.json"] #"../output/run.json", "../output/run0.json", 
#dfs = [] # an empty list to store the data frames

#for file in file_list:
    with open(file) as f:
        json_data = pd.json_normalize(json.loads(f.read()))
    dfs.append(json_data)
df = pd.concat(dfs, sort=False) # or sort=True depending on your needs
df




In [None]:
#df = pd.read_json("../output/run.json", lines=True)
from pandas.io.json import json_normalize
df = pd.concat([pd.DataFrame(json_normalize(x)) for x in df['tweets']],ignore_index=True)
df