In [None]:
import sys
import os

sys.path.append("..")
os.environ["DJANGO_SETTINGS_MODULE"] = "mysite.settings"
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

import django
django.setup()

In [None]:
from datetime import timezone
import dateutil

import pandas as pd

from django.contrib.auth.models import User
from fileranker.models import Question, Response, Sequence

In [None]:
DEFAULT_GOAL = 50

In [None]:
def import_sequences(sequences_csv: str):
    df = pd.read_csv(sequences_csv)

    for seq_name, group_df in df.groupby("sequence"):
        group_df = group_df.drop(columns="sequence")
        group_df = group_df.set_index("position")
        
        # Check if exists
        if Sequence.objects.filter(name=seq_name).exists():
            print(f"Warning: Skipping sequence {seq_name} as it already exists...")
        
        # Create sequence with questions
        print(f"Creating sequence '{seq_name}' with {len(group_df)} questions...")
        seq = Sequence.objects.create(name=seq_name, goal=DEFAULT_GOAL)
        for position, row in group_df.iterrows():
            Question.objects.create(
                sequence=seq,
                position=position,
                project_a=row["project_a"],
                project_b=row["project_b"],
                filename_a=row["filename_a"],
                filename_b=row["filename_b"],
                content_a=row["content_a"],
                content_b=row["content_b"],
            )

In [None]:
def import_responses(responses_csv: str):
    df = pd.read_csv(responses_csv)

    for (seq_name, username), group_df in df.groupby(["sequence", "username"]):
        print(f"Inserting user '{username}' responses for sequence '{seq_name}'...")
        seq = Sequence.objects.get(name=seq_name)
        user = User.objects.get(username=username)
        response_count = Response.objects.filter(user=user, question__sequence=seq).count()
        print(f"This sequence-user pair already has {response_count} responses")
        print(f"Inserting {len(group_df)} new responses...")
        print()

        group_df = group_df.drop(columns=["sequence", "username"])
        group_df = group_df.set_index("position")

        for position, row in group_df.iterrows():
            question = Question.objects.get(sequence=seq, position=position)
            dt = dateutil.parser.parse(row["responded_on"]).replace(tzinfo=timezone.utc)
            Response.objects.create(user=user, question=question, responded_on=dt, value=row["value"])

In [None]:
import_sequences("../artifacts/my_sequences.csv")

In [None]:
import_sequences("../artifacts/testset-largefiles-50p.csv")

In [None]:
import_sequences("../artifacts/testset-largefiles-75p.csv")

In [None]:
import_responses("../artifacts/my_responses.csv")