### Qatar World Cup 2022
#### Loading raw source csvs into Big Query

[_Game Attendance_](https://www.kaggle.com/datasets/parasharmanas/qatar-2022-fifa-world-cup-attendance) <br> (`data/attendance.csv`)

[_Tweets_](https://www.kaggle.com/datasets/konradb/qatar-world-cup-2022-tweets) <br> (`data/tweets.csv`)

In [17]:
import sys
import pandas as pd
import logging
from typing import List
from google.cloud import bigquery 
from google.oauth2 import service_account

### BigQuery Variable ###  
PROJECT_NAME = "dbt-indep-project"
DATASET_NAME = "world_cup_22"
 
### Set Creds and Create Client ### 
KEY_PATH = "/home/alex/.creds/dbt-indep.json"
credentials = service_account.Credentials.from_service_account_file(
    KEY_PATH, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
client = bigquery.Client(credentials=credentials, project=credentials.project_id,)

### Create Dataset ### 
client.create_dataset(f"{PROJECT_NAME}.{DATASET_NAME}", exists_ok=True)

### Load Data ### 

# Set Job Config 
job_config = bigquery.LoadJobConfig(
source_format="CSV",
autodetect=True, 
create_disposition="CREATE_IF_NEEDED",
write_disposition="WRITE_TRUNCATE",
destination_table_description="Raw source data loaded to BigQuery"
)

# Load Data (Tweets)
df = pd.read_csv("./data/tweets.csv")
job = client.load_table_from_dataframe(df, f"{PROJECT_NAME}.{DATASET_NAME}.tweets", job_config=job_config)
job.result()

# Load Data (Attendance)
df = pd.read_csv("./data/tweets.csv")
job = client.load_table_from_dataframe(df, f"{PROJECT_NAME}.{DATASET_NAME}.attendance", job_config=job_config)
job.result()