In [None]:
import os
import re
import sys

from dotenv import load_dotenv
from google.cloud import bigquery
import pandas as pd

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.ingestion.auth import strava_auth
from src.ingestion.extractors.strava_extractor import StravaExtractor
from src.ingestion.loaders.bigquery_loader import BigQueryLoader

# Get strava data

In [None]:
load_dotenv()

CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
REFRESH_TOKEN = os.getenv('REFRESH_TOKEN')

In [None]:
access_token = strava_auth.get_access_token()

extractor = StravaExtractor(access_token)
all_activities_raw = extractor.fetch_all_activities()

df_all_activities_raw = pd.json_normalize(all_activities_raw)
df_all_activities_raw.head()

In [None]:
# Data Cleaning for bigquery
df_all_activities = df_all_activities_raw.copy()

# Replace all invalid characters with underscores
df_all_activities.columns = [
    re.sub(r'[^a-zA-Z0-9_]', '_', col) for col in df_all_activities.columns
]

df_all_activities.head()

# GCP Exploration

In [None]:
loader = BigQueryLoader()

In [None]:
loader.load_data(data=df_all_activities)

## Read data

In [None]:
GCP_PROJECT_ID = os.environ.get('GCP_PROJECT_ID')
BIGQUERY_DATASET = os.environ.get('BIGQUERY_DATASET')
BIGQUERY_TABLE_ACTIVITIES_RAW = os.environ.get('BIGQUERY_TABLE_ACTIVITIES_RAW')

TABLE_ID = f'{GCP_PROJECT_ID}.{BIGQUERY_DATASET}.{BIGQUERY_TABLE_ACTIVITIES_RAW}'
print(TABLE_ID)

In [None]:
CREDENTIALS_PATH = '../credentials/sa-athlete-dashboard.json'

client = bigquery.Client.from_service_account_json(
    CREDENTIALS_PATH, project=GCP_PROJECT_ID
)

In [None]:
table = client.get_table(TABLE_ID)
print(f'Tabelle gefunden: {TABLE_ID}')
print(f'Spalten: {[schema.name for schema in table.schema]}')
print(f'Zeilen: {table.num_rows}')

# --- 5. Kleine Abfrage, um Daten zu testen ---
query = f'SELECT * FROM `{TABLE_ID}`'
print('🔹 Starte Abfrage:', query)

job = client.query(query)
df = job.result().to_dataframe()  # Ergebnisse in Pandas DataFrame

print('✅ Abfrage abgeschlossen, erste Zeilen:')
df