In [1]:
import pandas as pd
import json
import csv

from config import PATH_TO_CSV

In [2]:
df = pd.read_csv(PATH_TO_CSV)

In [None]:
df = df.loc[:, ~df.columns.str.contains(r'_[0-9]+$')]
df.columns

In [4]:
drop_colmns = ['id', 'user_id', 'photo_left', 'photo_right', 'photo_full_face', 'username', 'chat_id', 'date_created', 'question_old', 'question_allergen', 'question_medicines', 'question_skin_type']
df.drop(columns=drop_colmns, inplace=True)

In [5]:
df.to_csv("data.csv", index=False)

In [None]:
# Load the JSON data from the file
with open("acne_detection_results.json") as f:
    data = json.load(f)

# Initialize counts dictionary to store counts for each image
counts = {}

# Iterate through each key-value pair in the JSON data
for key, value in data.items():
    # Initialize count for the current image
    class_probs_count = 0
    # Iterate through each dictionary in the list
    for item in value:
        # Increment the count if "class_probs" exists
        if "class_probs" in item:
            class_probs_count += len(item["class_probs"])
    # Store the count for the current image
    counts[key] = class_probs_count

# Print the counts for each image
for image_name, count in counts.items():
    print(f"Image: {image_name}, Total count of pimples: {count}")

# Write the summary to a CSV file
with open("summary_file.csv", "w", newline="") as csvfile:
    fieldnames = ["image_name", "total_pimples"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for image_name, count in counts.items():
        writer.writerow({"image_name": image_name, "total_pimples": count})

In [7]:
# Read the CSV file
data = {}
with open("summary_file.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        image_name = row["image_name"].split("_")[0]  # Extract date part
        total_pimples = int(row["total_pimples"])
        if image_name in data:
            data[image_name] += total_pimples
        else:
            data[image_name] = total_pimples

# Write the merged summary to a new CSV file
with open("merged_summary_file.csv", "w", newline="") as csvfile:
    fieldnames = ["image_name", "total_pimples"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for image_name, total_pimples in data.items():
        writer.writerow(
            {"image_name": image_name, "total_pimples": total_pimples}
        )

In [8]:
# Read total class probabilities from 'merged_summary_file.csv' and store in a dictionary
total_probs_by_date = {}
with open("merged_summary_file.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
        total_probs_by_date[row["image_name"]] = row["total_pimples"]

# Read 'data.csv', add 'total_pimples' column, and write to a new file
with open("data.csv", "r") as csvfile:
    reader = csv.DictReader(csvfile)
    fieldnames = reader.fieldnames + ["total_pimples"]

    with open("updated_data.csv", "w", newline="") as outfile:
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()

        for row in reader:
            date = row["date"]
            total_probs = total_probs_by_date.get(date, 0)
            row["total_pimples"] = total_probs
            writer.writerow(row)

In [None]:
data = pd.read_csv("updated_data.csv")
data.head(25)

In [None]:
df1 = data[["date", "total_pimples", "pimples"]]

# Convert 'Pimples' column to numeric (ignore errors for non-numeric values)
df1["pimples"] = pd.to_numeric(df1["pimples"], errors="coerce")

# Iterate over the 'Pimples' column
for i in range(1, len(df1) - 1):
    if isinstance(df1.at[i, "pimples"], str):  # If the value is text
        # Calculate the average of neighboring rows
        avg_pimples = (df1.at[i - 1, "pimples"] + df1.at[i + 1, "pimples"]) / 2
        df1.at[i, "pimples"] = avg_pimples

# Replace '0' values in 'total_pimples' column with values from 'Pimples' column
df1.loc[df1["total_pimples"] == 0, "total_pimples"] = df1["pimples"]

# Calculate the average of non-zero values in the column
average = df1.loc[df1["total_pimples"] != 0, "total_pimples"].mean()

# Replace '0' values with the calculated average
df1["total_pimples"].fillna(average, inplace=True)

data["total_pimples"] = df1["total_pimples"].astype(int).shift(-1)
data = data.drop(columns=["pimples"])
data.to_csv('data_with_pimples.csv', index=False)