In [1]:
# Data cleaning:
import re
import codecs
import pandas as pd

def decode(input_filepath, output_filepath):
    with codecs.open(input_filepath, "r") as input_file:
        with codecs.open(output_filepath, "w", encoding="utf-8") as output_file:
            for line in input_file:
                file_content = re.sub("\t", ",", line)
                output_file.write(file_content)

decode("pokemon2-8.tsv", "pokemon.csv")

def df_import(csv_file):
    poke_df = pd.read_csv(csv_file, index_col="national_number")
    return poke_df

poke_df = df_import('pokemon.csv')

def df_drop_add(dataframe):
    """
    ## df_drop_add(dataframe)
    removes a specific list of columns & adds a 'wins' and 'losses' column

    *dataframe:
    - takes a pandas dataframe
    """
    # Columns to drop from dataframe
    dataframe.drop(columns=[
        'japanese_name',
        'percent_male',
        'percent_female',
        'capture_rate',
        'base_egg_steps',
        'evochain_0',
        'evochain_1',
        'evochain_2',
        'evochain_3',
        'evochain_4',
        'evochain_5',
        'evochain_6',
        'gigantamax',
        'mega_evolution',
        'mega_evolution_alt',
    ],   
        axis=1,
        inplace=True,
    )
    # Columns to add to dataframe
    col_list=['wins','losses']
    for col in col_list:
        if col not in dataframe.columns:
            dataframe['wins']=0
            dataframe['losses']=0
        else:
            pass
    # rename column "english_name" to "name"
    dataframe.rename(columns={"english_name": "name"}, inplace=True)
    return dataframe
df_drop_add(poke_df)   
poke_df
# poke_df.dtypes
# poke_df.name["Pikachu"]

KeyError: 'Pikachu'

In [24]:
# load to big query

from google.cloud import bigquery
import logging
from logging import INFO
import pandas as pd
import pandas_gbq
import sys

logging.basicConfig(format='[%(levelname)-5s][%(asctime)s][%(module)s:%(lineno)04d] : %(message)s',
                    level=INFO,
                    stream=sys.stderr)
logger: logging.Logger = logging

def load_to_gbq():

    # Instantiate big query client api which will create a dataset
    client = bigquery.Client()
    # Tell the client to use "poke_battler_table" as the dataset name to create in the project
    dataset_id = "{}.poke_battler_table".format(client.project)
    # Pass dataset_id to bigquery's Dataset class to build a reference
    dataset = bigquery.Dataset(dataset_id)
    # Assign the datasets server location to US
    dataset.location = "US"
    # Tell the client to create the dataset on google big query with the completed information
    dataset = client.create_dataset(dataset, exists_ok=True, timeout=30)
    # If successful, log the creation of the dataset
    logger.info("Created dataset {}.{}.".format(client.project, dataset.dataset_id))


    # Project to look for when creating a table,
    project_id = "deb-01-346205"
    # dataset to insert the table into
    table_id = "poke_battler_table.pokemon"

    # Loading transformed dataframe into google big query with the specified project/dataset as targets and a specified table schema.
    logger.info("Loading dataframe to {}...".format(dataset.dataset_id))
    pandas_gbq.to_gbq(poke_df, table_id, project_id=project_id, if_exists="replace", api_method="load_csv", table_schema=[
        {'name': 'national_number', 'type': 'INT64'}, 
        {'name': 'gen', 'type': 'STRING'}, 
        {'name': 'name', 'type': 'STRING'}, 
        {'name': 'primary_type', 'type': 'STRING'}, 
        {'name': 'secondary_type', 'type': 'STRING'}, 
        {'name': 'classification', 'type': 'STRING'}, 
        {'name': 'height_m', 'type': 'FLOAT64'}, 
        {'name': 'weight_kg', 'type': 'FLOAT64'}, 
        {'name': 'hp', 'type': 'INT64'}, 
        {'name': 'attack', 'type': 'INT64'}, 
        {'name': 'defense', 'type': 'INT64'}, 
        {'name': 'sp_attack', 'type': 'INT64'}, 
        {'name': 'sp_defense', 'type': 'INT64'}, 
        {'name': 'speed', 'type': 'INT64'}, 
        {'name': 'abilities_0', 'type': 'STRING'}, 
        {'name': 'abilities_1', 'type': 'STRING'}, 
        {'name': 'abilities_2', 'type': 'STRING'},
        {'name': 'abilities_hidden', 'type': 'STRING'},
        {'name': 'against_normal', 'type': 'FLOAT64'},
        {'name': 'against_fire', 'type': 'FLOAT64'},
        {'name': 'against_water', 'type': 'FLOAT64'},
        {'name': 'against_electric', 'type': 'FLOAT64'},
        {'name': 'against_grass', 'type': 'FLOAT64'},
        {'name': 'against_ice', 'type': 'FLOAT64'},
        {'name': 'against_fighting', 'type': 'FLOAT64'},
        {'name': 'against_poison', 'type': 'FLOAT64'},
        {'name': 'against_ground', 'type': 'FLOAT64'},
        {'name': 'against_flying', 'type': 'FLOAT64'},
        {'name': 'against_psychic', 'type': 'FLOAT64'},
        {'name': 'against_bug', 'type': 'FLOAT64'},
        {'name': 'against_rock', 'type': 'FLOAT64'},
        {'name': 'against_ghost', 'type': 'FLOAT64'},
        {'name': 'against_dragon', 'type': 'FLOAT64'},
        {'name': 'against_dark', 'type': 'FLOAT64'},
        {'name': 'against_steel', 'type': 'FLOAT64'},
        {'name': 'against_fairy', 'type': 'FLOAT64'},
        {'name': 'is_sublegendary', 'type': 'INT64'},
        {'name': 'is_legendary', 'type': 'INT64'},
        {'name': 'is_mythical', 'type': 'INT64'},
        {'name': 'description', 'type': 'STRING'},
        {'name': 'wins', 'type': 'INT64'},
        {'name': 'losses', 'type': 'INT64'}])
    logger.info(f"Successfully loaded poke_df into " + "{}.".format(dataset.dataset_id))

load_to_gbq()

[INFO ][2022-06-07 11:27:57,802][2873967723:0026] : Created dataset deb-01-346205.poke_battler_table.
[INFO ][2022-06-07 11:27:57,804][2873967723:0036] : Loading dataframe to poke_battler_table...
898 out of 898 rows loaded.,827][gbq:0606] : 
[INFO ][2022-06-07 11:28:04,984][2873967723:0080] : Successfully loaded poke_df into poke_battler_table.
