## Weather Data Collection


In [14]:
import pandas as pd
import requests
import io
from google.colab import files
from datetime import datetime, timedelta
import csv  # for quoting options

API_KEY = "7H4THT9QDNSJX23R5SX52VJDV"
location = "Istanbul,Turkey"
start_date = datetime.strptime("2025-03-14", "%Y-%m-%d")
end_date = datetime.today()

def fetch_weather_chunk(start, end):
    url = f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{location}/{start}/{end}"
    params = {
        "unitGroup": "metric",
        "key": API_KEY,
        "include": "days",
        "contentType": "csv"
    }
    response = requests.get(url, params=params)
    if response.status_code != 200:
        print(f"Error fetching data from {start} to {end}: {response.text}")
        return pd.DataFrame()
    df = pd.read_csv(io.StringIO(response.text))

    df["name"] = df["name"].str.encode("latin1").str.decode("utf-8")
    df = df[["datetime", "conditions", "tempmax", "tempmin", "humidity"]]
    df.columns = ["date", "weather_condition", "high_temp", "low_temp", "humidity"]
    return df

weather_frames = []
current = start_date

while current < end_date:
    next_month = (current.replace(day=1) + timedelta(days=32)).replace(day=1)
    chunk_end = min(end_date, next_month - timedelta(days=1))
    print(f"Fetching: {current.date()} to {chunk_end.date()}")
    chunk_df = fetch_weather_chunk(current.strftime("%Y-%m-%d"), chunk_end.strftime("%Y-%m-%d"))
    weather_frames.append(chunk_df)
    current = next_month

weather_data = pd.concat(weather_frames, ignore_index=True)

filename = "istanbul_weather_cleaned.csv"
weather_data.to_csv(filename, index=False, quoting=csv.QUOTE_MINIMAL)
files.download(filename)






Fetching: 2025-03-14 to 2025-03-31
Fetching: 2025-04-01 to 2025-04-25


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Apple Health Data Collection

In [None]:

from google.colab import drive
drive.mount('/content/drive')

import xml.etree.ElementTree as ET
from datetime import datetime
from collections import defaultdict
import csv
import pandas as pd
import zipfile
from google.colab import files

# --- Configuration ---
INPUT_FILE = "/content/drive/MyDrive/export_new.xml"
START_DATE = datetime(2025, 3, 17)
END_DATE = datetime(2025, 4, 22)

FILES = {
    "step_count": "daily_step_count.csv",
    "resting_hr": "resting_heart_rate.csv",
    "average_hr": "average_daily_heart_rate.csv",
    "active_energy": "active_calories.csv",
    "exercise_minutes": "exercise_duration.csv",
    "sleep_analysis": "sleep_duration.csv"
}

# --- Data containers ---
step_counts = defaultdict(float)
active_energy = defaultdict(float)
exercise_minutes = defaultdict(float)
heart_rates = defaultdict(list)
resting_heart_rates = defaultdict(list)
sleep_durations = defaultdict(float)

# --- Helper ---
def parse_date(s):
    return datetime.strptime(s[:19], "%Y-%m-%d %H:%M:%S")

# --- Parse XML ---
for event, elem in ET.iterparse(INPUT_FILE, events=("start",)):
    if elem.tag == "Record":
        r_type = elem.attrib.get("type")
        value = elem.attrib.get("value")
        start = parse_date(elem.attrib.get("startDate"))
        end = parse_date(elem.attrib.get("endDate"))
        date_key = start.date()

        if not (START_DATE.date() <= date_key <= END_DATE.date()):
            continue

        if r_type == "HKQuantityTypeIdentifierStepCount":
            step_counts[date_key] += float(value)

        elif r_type == "HKQuantityTypeIdentifierActiveEnergyBurned":
            active_energy[date_key] += float(value)

        elif r_type == "HKQuantityTypeIdentifierAppleExerciseTime":
            exercise_minutes[date_key] += float(value)

        elif r_type == "HKQuantityTypeIdentifierHeartRate":
            heart_rates[date_key].append(float(value))

        elif r_type == "HKQuantityTypeIdentifierRestingHeartRate":
            resting_heart_rates[date_key].append(float(value))

        elif r_type == "HKCategoryTypeIdentifierSleepAnalysis":
            duration_hours = (end - start).total_seconds() / 3600
            sleep_durations[date_key] += duration_hours

    elem.clear()

# --- Write CSVs ---
def write_csv(filename, data_dict, average=False):
    with open(filename, mode='w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(["Date", "Value"])
        for date in sorted(data_dict):
            if average:
                values = data_dict[date]
                avg = sum(values) / len(values) if values else 0
                writer.writerow([date, round(avg, 2)])
            else:
                writer.writerow([date, round(data_dict[date], 2)])

write_csv(FILES["step_count"], step_counts)
write_csv(FILES["active_energy"], active_energy)
write_csv(FILES["exercise_minutes"], exercise_minutes)
write_csv(FILES["resting_hr"], resting_heart_rates, average=True)
write_csv(FILES["average_hr"], heart_rates, average=True)
write_csv(FILES["sleep_analysis"], sleep_durations)

# --- Preview each file ---
print("Preview of generated data:\n")
for name, filename in FILES.items():
    print(f"--- {name.replace('_', ' ').title()} ({filename}) ---")
    df = pd.read_csv(filename)
    display(df.head())

# --- Zip & Download ---
with zipfile.ZipFile("apple_health_data.zip", "w") as zipf:
    for file in FILES.values():
        zipf.write(file)

files.download("apple_health_data.zip")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Preview of generated data:

--- Step Count (daily_step_count.csv) ---


Unnamed: 0,Date,Value
0,2025-03-17,18698.0
1,2025-03-18,6194.0
2,2025-03-19,16079.0
3,2025-03-20,17976.0
4,2025-03-21,22038.0


--- Resting Hr (resting_heart_rate.csv) ---


Unnamed: 0,Date,Value
0,2025-03-17,64.0
1,2025-03-18,58.0
2,2025-03-19,60.0
3,2025-03-20,57.0
4,2025-03-21,60.0


--- Average Hr (average_daily_heart_rate.csv) ---


Unnamed: 0,Date,Value
0,2025-03-17,105.03
1,2025-03-18,73.11
2,2025-03-19,82.69
3,2025-03-20,77.59
4,2025-03-21,102.23


--- Active Energy (active_calories.csv) ---


Unnamed: 0,Date,Value
0,2025-03-17,1059.94
1,2025-03-18,380.0
2,2025-03-19,714.69
3,2025-03-20,614.48
4,2025-03-21,1193.71


--- Exercise Minutes (exercise_duration.csv) ---


Unnamed: 0,Date,Value
0,2025-03-17,127.0
1,2025-03-18,15.0
2,2025-03-19,49.0
3,2025-03-20,36.0
4,2025-03-21,137.0


--- Sleep Analysis (sleep_duration.csv) ---


Unnamed: 0,Date,Value
0,2025-03-20,5.3
1,2025-03-21,7.96
2,2025-03-22,5.44
3,2025-03-23,7.1
4,2025-03-24,5.75


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>