In [4]:
import pandas as pd
import numpy as np
import plotly as plt
import seaborn as sns
import os
from datetime import datetime

In [None]:
# Read the CSV file
df = pd.read_csv('your_file.csv', 
                 skiprows=6,              # Skip the first 6 rows
                 nrows=11,                # Read only 11 rows (from 7th to 17th)
                 thousands=',',           # Handle thousands separator
                 encoding='utf-8-sig')    # Handle potential BOM in UTF-8 files

# Display the DataFrame
print(df)

In [8]:
import pandas as pd
import os
from datetime import datetime

def build_ua_pages_dataframe(folder):
    base_path = 'data'
    full_path = os.path.join(base_path, folder)
    all_data = []

    if not os.path.exists(full_path):
        raise ValueError(f"The directory {full_path} does not exist.")

    csv_files = [f for f in os.listdir(full_path) if f.endswith('.csv')]
    
    if not csv_files:
        raise ValueError(f"No CSV files found in {full_path}")

    for filename in csv_files:
        file_path = os.path.join(full_path, filename)
        print(f"Processing file: {file_path}")
        
        # Extract date range from filename
        date_range = filename.split()[-1].replace('.csv', '')
        start_date = datetime.strptime(date_range.split('-')[0], '%Y%m%d')
        
        # Determine year and quarter
        year = start_date.year
        quarter = f"Q{(start_date.month - 1) // 3 + 1}"
        
        try:
            # Read CSV file
            df = pd.read_csv(file_path, 
                             skiprows=6,
                             nrows=11,
                             thousands=',',
                             encoding='utf-8-sig')
            
            if df.empty:
                print(f"Warning: {filename} is empty")
                continue

            # Add year and quarter columns
            df['Year'] = year
            df['Quarter'] = quarter
            
            all_data.append(df)
            print(f"Successfully processed {filename}")
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")
    
    if not all_data:
        raise ValueError("No data was successfully read from any CSV file")

    # Combine all DataFrames
    combined_df = pd.concat(all_data, ignore_index=True)
    
    print("Columns in the combined DataFrame:")
    print(combined_df.columns.tolist())

    # Clean up the data
    percentage_columns = ['Bounce Rate']
    for col in percentage_columns:
        if col in combined_df.columns:
            combined_df[col] = combined_df[col].str.rstrip('%').astype('float') / 100.0

    if 'Avg. Session Duration' in combined_df.columns:
        combined_df['Avg. Session Duration'] = pd.to_timedelta(combined_df['Avg. Session Duration'])

    numeric_columns = ['Users', 'New Users', 'Sessions', 'Pages / Session']
    for col in numeric_columns:
        if col in combined_df.columns:
            combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce')
    
    return combined_df

# Usage
try:
    df = build_ua_pages_dataframe('flora/flora-pages/Pages/')
    print(df.head())
    print(df.dtypes)
except ValueError as e:
    print(f"Error: {str(e)}")

Processing file: data/flora/flora-pages/Pages/Analytics Flora Bowley Pages 20211001-20211231.csv
Successfully processed Analytics Flora Bowley Pages 20211001-20211231.csv
Processing file: data/flora/flora-pages/Pages/Analytics Flora Bowley Pages 20220101-20220331.csv
Successfully processed Analytics Flora Bowley Pages 20220101-20220331.csv
Processing file: data/flora/flora-pages/Pages/Analytics Flora Bowley Pages 20210101-20210331.csv
Successfully processed Analytics Flora Bowley Pages 20210101-20210331.csv
Processing file: data/flora/flora-pages/Pages/Analytics Flora Bowley Pages 20210401-20210630.csv
Successfully processed Analytics Flora Bowley Pages 20210401-20210630.csv
Processing file: data/flora/flora-pages/Pages/Analytics Flora Bowley Pages 20210701-20210930.csv
Successfully processed Analytics Flora Bowley Pages 20210701-20210930.csv
Processing file: data/flora/flora-pages/Pages/Analytics Flora Bowley Pages 20220401-20220630.csv
Successfully processed Analytics Flora Bowley Pa