In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

import requests
from PIL import Image
from io import BytesIO
import os
import csv

from transformers import ViTFeatureExtractor, ViTForImageClassification
import torch

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [2]:
import os
import csv
from PIL import Image
import torch
from transformers import ViTForImageClassification, ViTFeatureExtractor

# Initialize model and feature extractor
model = ViTForImageClassification.from_pretrained('nateraw/vit-age-classifier')
feature_extractor = ViTFeatureExtractor.from_pretrained('nateraw/vit-age-classifier')

# Directory mapping
image_directories = {
    "clean": '/Users/samin/Desktop/Classes/9.60/9.60-Project/datasets/clean_dataset'
}
for i in range(1, 13):
    image_directories[f"filter {i}"] = f'/Users/samin/Desktop/Classes/9.60/9.60-Project/datasets/filter {i}'

# Dictionary to hold overall and per-class accuracies
class_accuracies = {}
overall_accuracies = {}

def evaluate_model(image_dir, csv_file_path, dataset_name):
    predictions = []
    class_counts = {i: {'correct': 0, 'total': 0} for i in range(9)}
    output_csv_path = f'{image_dir}/results_{dataset_name}.csv'

    # Prepare to write results to a CSV file
    with open(output_csv_path, mode='w', newline='') as file:
        csv_writer = csv.writer(file)
        csv_writer.writerow(['image_name', 'class_label', 'accuracy'])
        
        for image_name in os.listdir(image_dir):
            if not image_name.endswith(('.png', '.jpg')):
                continue
            image_path = os.path.join(image_dir, image_name)
            try:
                with Image.open(image_path) as im:
                    inputs = feature_extractor(images=im, return_tensors="pt")
                    outputs = model(**inputs)
                    proba = torch.softmax(outputs.logits, dim=1)
                    pred_class = proba.argmax(1).item()
                    predictions.append((image_name, pred_class))
            except Exception as e:
                print(f"An error occurred with image {image_name}: {e}")

        actual_labels = {}
        with open(csv_file_path, mode='r') as csvfile:
            csvreader = csv.DictReader(csvfile)
            for row in csvreader:
                actual_labels[row['image_name']] = int(row['class_label'])
                class_counts[int(row['class_label'])]['total'] += 1

        correct_predictions = 0
        for image_name, pred_class in predictions:
            is_correct = 1 if image_name in actual_labels and pred_class == actual_labels[image_name] else 0
            correct_predictions += is_correct
            class_counts[actual_labels[image_name]]['correct'] += is_correct if is_correct else 0
            csv_writer.writerow([image_name, pred_class, is_correct])

        overall_accuracy = correct_predictions / len(predictions) if predictions else 0
        overall_accuracies[dataset_name] = overall_accuracy
        class_accuracies[dataset_name] = {class_id: (counts['correct'] / counts['total']) * 100 if counts['total'] > 0 else 0 for class_id, counts in class_counts.items()}

for dataset_name, image_dir in image_directories.items():
    csv_file_path = '/Users/samin/Desktop/Classes/9.60/9.60-Project/datasets/labels 2.csv' if "filter" in dataset_name else '/Users/samin/Desktop/Classes/9.60/9.60-Project/datasets/labels.csv'
    evaluate_model(image_dir, csv_file_path, dataset_name)

# Calculate combined average accuracy for filters 1-12
combined_average_accuracy = sum(overall_accuracies[f"filter {i}"] for i in range(1, 13)) / 12




KeyError: '40shifted51 5.53.53\u202fPM.png.png'

In [54]:
import plotly.graph_objects as go
import pandas as pd
import os

# Assuming class_accuracies and overall_accuracies are already populated
class_ids = list(range(9))  # Classes 0 to 8
filter_names = [f"filter {i}" for i in range(1, 13)] + ["clean"]
filter_names_for_avg = [f"filter {i}" for i in range(1, 13)]

# Prepare data for plotting class accuracies
class_accuracy_data = {class_id: [class_accuracies[filter_name].get(class_id, 0) for filter_name in filter_names] for class_id in class_ids}

# Calculate average accuracy per class across filters 1-12
average_class_accuracies = {class_id: sum(class_accuracy_data[class_id][:12]) / 12 for class_id in class_ids}

def classtoage(classId):
    if classId == 0:
        return '0-2'
    elif classId == 1:
        return '4-9'
    elif classId == 2:
        return '10-19'
    elif classId == 3:
        return '10-29'
    elif classId == 4:
        return '30-39'
    elif classId == 5:
        return '40-49'
    elif classId == 6:
        return '50-59'
    elif classId == 7:
        return '60-69'
    else:
        return '70+'
    
# Create subplots for each class including the average for filters 1-12
fig = go.Figure()
for class_id, accuracies in class_accuracy_data.items():
    accuracies.append(average_class_accuracies[class_id])  # Append the average to the list
    fig.add_trace(go.Line(x=filter_names + ['Average 1-12'], y=accuracies, name= classtoage(class_id)))

fig.update_layout(
    barmode='group',
    title='Accuracy for Each Class Across All Filters',
    xaxis_title='Testing Dataset',
    yaxis_title='Accuracy (%)',
    plot_bgcolor='white'
)

plot_directory = '/Users/samin/Desktop/Classes/9.60/9.60-Project/plots'
if not os.path.exists(plot_directory):
    os.makedirs(plot_directory)
fig.write_html(os.path.join(plot_directory, 'class_accuracies.html'))
fig.show()

# Prepare data for overall accuracies plot including the average for filters 1-12
overall_accuracy_data = [overall_accuracies[filter_name] * 100 for filter_name in filter_names]  # Convert fraction to percentage
combined_average_accuracy = sum(overall_accuracy_data[:12]) / 12  # Recalculate to ensure no logic error
overall_accuracy_data.append(combined_average_accuracy)  # Append combined average


# colors = [
#     'red', 'blue', 'green', 'yellow', 'orange', 'purple', 'cyan', 'magenta', 'grey', 
#     'lightblue', 'darkgreen', 'lightgreen', 'pink', 'teal'
# ]

# Plotting total accuracies including the average for filters 1-12
fig_total = go.Figure()
fig_total.add_trace(go.Bar(
    x=filter_names + ['Average 1-12'], 
    y=overall_accuracy_data, 
    marker_color="lightblue"  # Apply the color list to the marker_color attribute
))

fig_total.update_layout(
    title='Total Accuracy for All Filters',
    xaxis_title='Testing Dataset',
    yaxis_title='Total Accuracy (%)',
    plot_bgcolor='white'
)
fig_total.write_html(os.path.join(plot_directory, 'total_accuracies.html'))
fig_total.show()



plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.




In [49]:
# Saving Distributions of labels

labels_path = '/Users/samin/Desktop/Classes/9.60/9.60-Project/datasets/labels.csv'
labels2_path = '/Users/samin/Desktop/Classes/9.60/9.60-Project/datasets/labels 2.csv'
labels_df = pd.read_csv(labels_path)
labels2_df = pd.read_csv(labels2_path)

# Count the occurrences of each class label and align indexes to ensure all labels are present in both datasets
total_labels = sorted(set(labels_df['class_label']).union(set(labels2_df['class_label'])))
label_counts1 = labels_df['class_label'].value_counts().reindex(total_labels, fill_value=0)
label_counts2 = labels2_df['class_label'].value_counts().reindex(total_labels, fill_value=0)

# Create a DataFrame for plotting
plot_df1 = pd.DataFrame({'Class Label': label_counts1.index, 'Number of Instances': label_counts1.values, 'Dataset': 'labels.csv'})
plot_df2 = pd.DataFrame({'Class Label': label_counts2.index, 'Number of Instances': label_counts2.values, 'Dataset': 'labels2.csv'})
combined_df = pd.concat([plot_df1, plot_df2])

# Plotting with Plotly
fig = go.Figure()
fig.add_trace(go.Bar(
    x=combined_df[combined_df['Dataset'] == 'labels.csv']['Class Label'],
    y=combined_df[combined_df['Dataset'] == 'labels.csv']['Number of Instances'],
    name='labels.csv',
    marker_color='indianred'
))
fig.add_trace(go.Bar(
    x=combined_df[combined_df['Dataset'] == 'labels2.csv']['Class Label'],
    y=combined_df[combined_df['Dataset'] == 'labels2.csv']['Number of Instances'],
    name='labels2.csv',
    marker_color='lightblue'
))

# Update the layout
fig.update_layout(
    title="Combined Distribution of Class Labels in Both Datasets",
    xaxis_title="Class Label",
    yaxis_title="Number of Instances",
    barmode='group',  # This parameter groups bars of different datasets next to each other
    plot_bgcolor='white'
)

# Directory to save the plot
plot_directory = '/Users/samin/Desktop/Classes/9.60/9.60-Project/plots'
if not os.path.exists(plot_directory):
    os.makedirs(plot_directory)

# Save the figure
fig.write_html(os.path.join(plot_directory, 'class_distribution_comparison.html'))
fig.show()
