In [5]:
!pip install requests

Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting charset_normalizer<4,>=2 (from requests)
  Downloading charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (36 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Downloading certifi-2025.8.3-py3-none-any.whl.metadata (2.4 kB)
Downloading requests-2.32.5-py3-none-any.whl (64 kB)
Downloading charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (151 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Using cached urllib3-2.5.0-py3-none-any.whl (129 kB)
Downloading certifi-2025.8.3-py3-none-any.whl (161 kB)
Installing collected packages: urllib3, idna, charset_normalizer, cer

In [7]:
import requests
import pandas as pd
from io import StringIO

In [9]:
def read_csv_requests():
    url = "https://raw.githubusercontent.com/apfurlan/8-figure-data-engineer/refs/heads/main/data/ads_spend.csv"
    
    try:
        # Make GET request
        response = requests.get(url)
        response.raise_for_status()  # Raise exception for bad status codes
        
        # Read CSV content into pandas DataFrame
        csv_content = StringIO(response.text)
        df = pd.read_csv(csv_content)
        
        print("Successfully loaded data using requests:")
        print(f"Shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        return df
    except requests.exceptions.RequestException as e:
        print(f"HTTP request error: {e}")
        return None
    except Exception as e:
        print(f"Error processing CSV: {e}")
        return None


print("Reading CSV from GitHub...")

# Use the pandas method (simpler)
df =  read_csv_requests()

if df is not None:
    # Display basic information about the data
    print("\n" + "="*50)
    print("DATA SUMMARY:")
    print("="*50)
    print(f"Total rows: {len(df)}")
    print(f"Total columns: {len(df.columns)}")
    print("\nColumn data types:")
    print(df.dtypes)
    print("\nBasic statistics:")
    print(df.describe())

Reading CSV from GitHub...
Successfully loaded data using requests:
Shape: (2000, 10)
Columns: ['date', 'platform', 'account', 'campaign', 'country', 'device', 'spend', 'clicks', 'impressions', 'conversions']

DATA SUMMARY:
Total rows: 2000
Total columns: 10

Column data types:
date            object
platform        object
account         object
campaign        object
country         object
device          object
spend          float64
clicks           int64
impressions      int64
conversions      int64
dtype: object

Basic statistics:
             spend       clicks   impressions  conversions
count  2000.000000  2000.000000   2000.000000  2000.000000
mean    845.382160   465.776000  25689.913500    27.458500
std     376.355343   248.882744  15993.415778    19.221525
min     200.250000    63.000000   2070.000000     1.000000
25%     510.640000   259.750000  13368.500000    13.000000
50%     837.185000   432.000000  21765.000000    22.000000
75%    1162.720000   624.250000  34855.750000