In [48]:
import numpy as np
import pandas as pd

In [49]:
#read in file only with certain columns that we need
fields = ['pokemonId', 'appearedHour', 'closeToWater', 'city', 'weather', 'weatherIcon', 'urban', 'suburban', 'midurban', 'rural']
df = pd.read_csv('300k.csv', skipinitialspace=True, usecols=fields)

In [50]:
#convert pokemonId to pokemon name - note: farfetch'd = farfetchd, mr. mime = mrmime
pokeNames = ['', 'bulbasaur', 'ivysaur', 'venusaur', 'charmander', 'charmeleon', 'charizard', 'squirtle', 'wartortle', 'blastoise', 'caterpie', 'metapod', 'butterfree', 'weedle', 'kakuna', 'beedrill', 'pidgey', 'pidgeotto', 'pidgeot', 'rattata', 'raticate', 'spearow', 'fearow', 'ekans', 'arbok', 'pikachu', 'raichu', 'sandshrew', 'sandslash', 'nidoran', 'nidorina', 'nidoqueen', 'nidoran', 'nidorino', 'nidoking', 'clefairy', 'vulpix', 'ninetales', 'jigglypuff', 'wigglytuff', 'zubat', 'golbat', 'oddish', 'gloom', 'vileplume', 'paras', 'parasect', 'venonat', 'venemoth', 'diglett', 'dugtrio', 'meowth', 'persian', 'psyduck', 'golduck', 'mankey', 'primeape', 'growlithe', 'arcanine', 'poliwag', 'poliwhirl', 'poliwrath', 'abra', 'kadabra', 'alakazam', 'machop', 'machoke', 'machamp', 'bellsprout', 'weepinbell', 'victreebel', 'tentacool', 'tentacruel', 'geodude', 'graveler', 'golem', 'ponyta', 'rapidash', 'slowpoke', 'slowbro', 'magnemite', 'magnetron', 'farfetchd', 'doduo', 'dodrio', 'seel', 'dewgong', 'grimer', 'muk', 'shellder', 'cloyster', 'gastly', 'haunter', 'gengar', 'onix', 'drowzee', 'hypno', 'krabby', 'kingler', 'voltorb', 'electrode', 'exeggcute', 'exeggutor', 'cubone', 'marowak', 'hitmonlee', 'hitmonchan', 'lickitung', 'koffing', 'weezing', 'rhyhorn', 'rhydon', 'chansey', 'tangela', 'kangaskhan', 'horsea', 'seadra', 'goldeen', 'seaking', 'staryu', 'starmie', 'mrmime', 'scyther', 'jynx', 'electabuzz', 'magmar', 'pinsir', 'tauros', 'magikarp', 'gyarados', 'lapras', 'ditto', 'eevee', 'vaporeon', 'jolteon', 'flareon', 'porygon', 'omanyte', 'omastar', 'kabuto', 'kabutops', 'aerodactyl', 'snorlax', 'articuno', 'zapdos', 'moltres', 'dratini', 'dragonair', 'dragonite', 'mewtwo', 'mew']

def idToName(pokeId): 
    index = int(pokeId)
    return pokeNames[index]

df['pokemonName'] = df['pokemonId'].apply(idToName)

In [51]:
#convert appearedHour to time of day to match scraped data
def timeConvert(hour):
    if(hour >= 4 and hour < 10):
        return 'morning'
    elif(hour >= 10 and hour < 18):
        return 'day'
    else:
        return 'night'

df['appearedTimeOfDay'] = df['appearedHour'].apply(timeConvert)

In [52]:
#dictionary of name: (# of morning appearances, day, night) 
pokeTimes = {}

for i, row in df.iterrows():
    name = row['pokemonName']
    time = row['appearedTimeOfDay']
    if name in pokeTimes:
        m, d, n = pokeTimes[name]
        if(time == 'morning'):
            pokeTimes[name] = (m+1, d, n)
        if(time == 'day'):
            pokeTimes[name] = (m, d+1, n)
        if(time == 'night'):
            pokeTimes[name] = (m, d, n+1)
    else:
        if(time == 'morning'):
            pokeTimes[name] = (1, 0, 0)
        if(time == 'day'):
            pokeTimes[name] = (0, 1, 0)
        if(time == 'night'):
            pokeTimes[name] = (0, 0, 1)   


In [59]:
#dictionary of name: time of day with most appearances
pokeTimeOfDay = {}
for pokemon, (m, d, n) in pokeTimes.items():
    if(m > d and m > n):
        pokeTimeOfDay[pokemon] = 'morning'
    elif(d > m and d > n):
        pokeTimeOfDay[pokemon] = 'day'
    elif(n > m and n > d):
        pokeTimeOfDay[pokemon] = 'night'
    elif(m == d and m == n):
        if(m == n):
            pokeTimeOfDay[pokemon] = 'morning, day, night'
        else: 
            pokeTimeOfDay[pokemon] = 'morning, day'
    elif(m == n):
        pokeTimeOfDay[pokemon] = 'morning, night'
    elif(d == n):
        pokeTimeOfDay[pokemon] = 'day, night'
    
print(pokeTimeOfDay)

{'pidgey': 'night', 'vaporeon': 'night', 'weedle': 'night', 'spearow': 'night', 'machoke': 'night', 'sandshrew': 'night', 'clefairy': 'night', 'rattata': 'night', 'seadra': 'night', 'graveler': 'night', 'nidoran': 'night', 'gyarados': 'night', 'ekans': 'night', 'golem': 'night', 'arcanine': 'night', 'venemoth': 'night', 'parasect': 'night', 'golbat': 'night', 'poliwhirl': 'night', 'starmie': 'night', 'dragonair': 'night', 'kingler': 'night', 'pidgeotto': 'night', 'kangaskhan': 'night', 'beedrill': 'night', 'kadabra': 'night', 'marowak': 'night', 'exeggutor': 'night', 'persian': 'night', 'primeape': 'night', 'rhydon': 'night', 'caterpie': 'night', 'kakuna': 'night', 'pikachu': 'night', 'dugtrio': 'night', 'slowbro': 'night', 'hypno': 'night', 'jynx': 'morning', 'weepinbell': 'night', 'magikarp': 'night', 'tauros': 'night', 'metapod': 'night', 'golduck': 'night', 'rapidash': 'night', 'diglett': 'night', 'oddish': 'night', 'bulbasaur': 'night', 'tentacruel': 'night', 'ninetales': 'night',

In [54]:
#number of appearances in urban vs rural
urbanAppearances = 0
ruralAppearances = 0

for i, row in df.iterrows():
    if(row['urban']):
        urbanAppearances+= 1
    if(row['rural']):
        ruralAppearances+= 1

print(str(urbanAppearances) + str(ruralAppearances))

127131,54136,28979,85775


In [57]:
#number of appearances in urban vs rural per pokemon
#dictionary of name: (urban appearances, rural)
pokePlace = {}
for i, row in df.iterrows():
    name = row['pokemonName']
    if name in pokePlace:
        u, r = pokePlace[name]
        if(row['urban']):
            pokePlace[name] = (u+1, r)
        if(row['rural']):
            pokePlace[name] = (u, r+1)
    else:
        if(row['urban']):
            pokePlace[name] = (1, 0)
        if(row['rural']):
            pokePlace[name] = (0, 1)
            
print(pokePlace)

{'pidgey': (20801, 15626), 'vaporeon': (4289, 3612), 'weedle': (9870, 9507), 'spearow': (5592, 3286), 'machoke': (310, 147), 'sandshrew': (906, 445), 'clefairy': (1693, 966), 'rattata': (15851, 12593), 'seadra': (1146, 581), 'graveler': (1103, 636), 'nidoran': (3500, 2082), 'gyarados': (3978, 2299), 'ekans': (1820, 848), 'golem': (80, 33), 'arcanine': (801, 343), 'venemoth': (3076, 2713), 'parasect': (2909, 2003), 'poliwhirl': (2049, 968), 'starmie': (1881, 849), 'dragonair': (293, 163), 'golbat': (4956, 2472), 'kingler': (2294, 1168), 'pidgeotto': (1340, 944), 'kangaskhan': (176, 62), 'beedrill': (67, 72), 'kadabra': (605, 371), 'exeggutor': (800, 496), 'persian': (762, 463), 'rhydon': (531, 307), 'pikachu': (225, 136), 'slowbro': (936, 459), 'jynx': (190, 63), 'weepinbell': (1410, 1149), 'primeape': (852, 447), 'metapod': (241, 187), 'caterpie': (3824, 3115), 'golduck': (1956, 1121), 'oddish': (512, 234), 'bulbasaur': (683, 353), 'tentacruel': (506, 407), 'ninetales': (249, 194), 'dr

In [71]:
#pokemon that appear more often in urban/rural
urbanPokemon = []
ruralPokemon = []
equalPokemon = []

for pokemon, (u, r) in pokePlace.items():
    if(u > r):
        urbanPokemon.append(pokemon)
    elif(r > u):
        ruralPokemon.append(pokemon)
    else:
        equalPokemon.append(pokemon)

print(str(urbanPokemon) + '\n\n' + str(ruralPokemon) + '\n\n' + str(equalPokemon))

['pidgey', 'vaporeon', 'weedle', 'spearow', 'machoke', 'sandshrew', 'clefairy', 'rattata', 'seadra', 'graveler', 'nidoran', 'gyarados', 'ekans', 'golem', 'arcanine', 'venemoth', 'parasect', 'poliwhirl', 'starmie', 'dragonair', 'golbat', 'kingler', 'pidgeotto', 'kangaskhan', 'kadabra', 'exeggutor', 'persian', 'rhydon', 'pikachu', 'slowbro', 'jynx', 'weepinbell', 'primeape', 'metapod', 'caterpie', 'golduck', 'oddish', 'bulbasaur', 'tentacruel', 'ninetales', 'drowzee', 'magikarp', 'tauros', 'hypno', 'vileplume', 'kakuna', 'lapras', 'gloom', 'kabutops', 'cloyster', 'seel', 'marowak', 'seaking', 'haunter', 'poliwag', 'pidgeot', 'dugtrio', 'rapidash', 'hitmonchan', 'squirtle', 'fearow', 'cubone', 'staryu', 'weezing', 'diglett', 'wigglytuff', 'geodude', 'farfetchd', 'articuno', 'magnemite', 'raticate', 'venonat', 'dodrio', 'dewgong', 'victreebel', 'omastar', 'charmander', 'electabuzz', 'poliwrath', 'omanyte', 'krabby', 'nidorino', 'psyduck', 'wartortle', 'growlithe', 'nidorina', 'muk', 'manke

In [55]:
#total appearances of each pokemon
pokeAppearances = {}
for i, row in df.iterrows():
    name = row['pokemonName']
    if name in pokeAppearances:
        pokeAppearances[name] = pokeAppearances[name] + 1
    else:
        pokeAppearances[name] = 1

In [56]:
#sorted by least to most appearances
pokeAppearancesSorted = sorted(pokeAppearances.items(), key=lambda x: x[1])
print(pokeAppearancesSorted)

[('doduo', 1), ('shellder', 5), ('aerodactyl', 5), ('kabuto', 7), ('machop', 7), ('charizard', 8), ('jigglypuff', 8), ('lapras', 10), ('flareon', 10), ('raichu', 10), ('abra', 12), ('venusaur', 12), ('onix', 12), ('omanyte', 14), ('nidoqueen', 16), ('rhyhorn', 16), ('paras', 16), ('nidoking', 17), ('blastoise', 18), ('porygon', 18), ('tentacool', 19), ('ditto', 20), ('ponyta', 20), ('tangela', 22), ('charmeleon', 23), ('mewtwo', 24), ('meowth', 25), ('zubat', 25), ('jolteon', 26), ('grimer', 28), ('cubone', 29), ('psyduck', 29), ('hitmonlee', 29), ('gastly', 29), ('poliwag', 32), ('exeggcute', 34), ('snorlax', 34), ('horsea', 34), ('machamp', 38), ('farfetchd', 40), ('lickitung', 40), ('slowpoke', 41), ('chansey', 43), ('koffing', 46), ('dragonite', 47), ('hitmonchan', 56), ('sandslash', 56), ('mrmime', 58), ('magnemite', 59), ('growlithe', 60), ('vulpix', 64), ('articuno', 73), ('goldeen', 84), ('butterfree', 95), ('wartortle', 99), ('ivysaur', 100), ('alakazam', 107), ('magmar', 130)