## Pie Chart Generation Script

<!-- Prerequisites
Make sure Python and the required libraries are installed: -->


In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt


## Features

- Reads CSV files containing workflow step timestamp ranges.
- Converts time values to seconds for easier calculations.
- Calculates the duration of each workflow step.
- Generates pie charts showing the distribution of workflow step durations.
- Saves the generated pie charts as PNG files in the output directory.


In [None]:

input_dir = '/Users/nvaishnavi/Documents/Instructional_Video_analysis/6_final_timestamp_ranges'
output_dir = '/Users/nvaishnavi/Documents/Instructional_Video_analysis/8_pie_charts_all'


def time_to_seconds(time_str):
    time_parts = time_str.split(":")
    return int(time_parts[0]) * 3600 + int(time_parts[1]) * 60 + float(time_parts[2])


def sanitize_filename(filename):
    
    return ''.join(c if c.isalnum() or c == '_' else '_' for c in filename)


for filename in os.listdir(input_dir):
    if filename.endswith('.csv'):  
        print(f"Processing file: {filename}")

        
        file_path = os.path.join(input_dir, filename)
        df = pd.read_csv(file_path)

        
        df['Start Time (seconds)'] = df['Start Time'].apply(time_to_seconds)
        df['End Time (seconds)'] = df['End Time'].apply(time_to_seconds)
        df['Duration (seconds)'] = df['End Time (seconds)'] - df['Start Time (seconds)']

        
        plt.figure(figsize=(8, 6))
        plt.pie(df['Duration (seconds)'], labels=df['Workflow Step'], autopct='%1.1f%%', startangle=140)
        plt.title(f"Workflow Step Duration Distribution - {filename}")
        plt.axis('equal')  

        
        sanitized_filename = sanitize_filename(filename)
        pie_chart_path = os.path.join(output_dir, f"pie_chart_{sanitized_filename}.png")

        
        plt.savefig(pie_chart_path)

        print(f"Pie chart saved to: {pie_chart_path}")
        plt.close()

print("Pie charts generated for all files.")