# Import All Necessary Modules And Setup Project

If you get any errors when importing these, ensure you run the commands:
```bash
$ python -m pip install -r requirements.txt
```
to install all necessary modules for this project. This command must be run from inside of this project directory.

It is recommended to use virtual environments for this project to ensure there is no conflicting package versions on your system.

Activate the virtual environment (if needed), run the pip install command, and then launch Jupyter Lab inside this project to get this project running.

In [None]:
# Uncomment the following line to execute the pip install
# %pip install -r requirements.txt

In [None]:
import pandas as pd
import numpy as np

# Visualization
from matplotlib import pyplot as plt
import seaborn as sns


## Load Datasets

In [None]:
keystroke_df_unedited = pd.read_csv("data/keystrokes.csv")
student_df_unedited = pd.read_csv("data/students.csv")

#### Copy Datasets For Modification

This preserves the initial datasets, in case we ever need to bring an unedited column/row back into anything

In [None]:
keystroke_df = keystroke_df_unedited.copy()
student_df = student_df_unedited.copy()

## Get DF Representing Single Student Submission And File

In [None]:
def getFileInStudentSubmission(df, student, assignment, fileName):
    '''
    Returns a dataframe that shows all submission data for a single file on a given assignment, for a given student

    df - a dataframe of keystroke data
    student - a string representing the SubjectID to filter by
    assignment - a strinng representing the filename to filter by
    fileName - the name of the file to filter by

    Returns a COPY of the dataframe, containing only the selected student, assignment, and file
    '''
    f = df[
          (df.SubjectID == student)
        & (df.AssignmentID == assignment)
        & (df.CodeStateSection == fileName)
        ].copy()
    return f

In [None]:
def getStudentSubmission(df, student, assignment):
    '''
    Returns a dataframe that shows all submission data for all files on a given assignment, for a given student

    df - a dataframe of keystroke data
    student - a string representing the SubjectID to filter by
    assignment - a string representing the filename to filter by
    
    Returns a COPY of the dataframe, containing only the selected student and assignment
    '''
    f = df[
          (df.SubjectID == student)
        & (df.AssignmentID == assignment)
        ].copy()

    return f

In [None]:
def filterDownToRunAndEdits(df):
    return df[(
            (df.EventType == 'File.Edit')
          | (df.EventType == 'Run.Program')
          )]

In [None]:
def filterDownToRunAndEditsAndPastes(df):
    return df[(
            (df.EventType == 'File.Edit')
          | (df.EventType == 'Run.Program')
          | (df.EventType == 'X-Paste')
          )]

In [None]:
def getStudentSubmissionRunsAndEdits(df, student, assignment):
    '''
    Returns a dataframe that shows all submission data for a given assignment, for a given student, with only run and edit events

    df - a dataframe of keystroke data
    student - a string representing the SubjectID to filter by
    assignment - a strinng representing the filename to filter by
    '''
    return filterDownToRunAndEdits(getStudentSubmission(df, student, assignment))

In [None]:
def getFileInStudentSubmissionRunsAndEdits(df, student, assignment, fileName):
    '''
    Returns a dataframe that shows all submission data for a single file on a given assignment, for a given student, with only run and edit events

    df - a dataframe of keystroke data
    student - a string representing the SubjectID to filter by
    assignment - a strinng representing the filename to filter by
    fileName - the name of the file to filter by
    '''

    return filterDownToRunAndEdits(getFileInStudentSubmission(df, student, assignment, fileName))

## Reconstruct Submissions

In [None]:
def reconstructSingleFileDebugger(df):
    '''
    See a submission as it gets reconstructed, ignoring the Run events

    This is used primarily for debugging
    '''

    s = ''

    fileStateCount = 0
    for _,row in df[df.EventType=='File.Edit'].iterrows():
        i = int(row.SourceLocation)
        fileLengthRn = len(s)
        if i > fileLengthRn:
            print("DEBUG: Woah buddy, your cursor is out of bounds")


        insert = '' if pd.isna(row.InsertText) else row.InsertText
        delete = '' if pd.isna(row.DeleteText) else row.DeleteText
        s = s[:i] + insert + s[i+len(delete):]

        print(f"""\
================
StateNo : {fileStateCount}
{i=}
{insert=}
{delete=}
{row.EventID=}
================
{s}
""")
        fileStateCount += 1

    return s

In [None]:
def reconstructFinalFile(df):
    '''
    Reconstruct a single file

    df - A dataframe containing a single student, for a single assignment, 
            and a single file of that assignment

    Returns: A string representing the final state of the file
    '''

    s = ''

    for _,row in df[df.EventType=='File.Edit'].iterrows():
        i = int(row.SourceLocation)

        insert = '' if pd.isna(row.InsertText) else row.InsertText
        delete = '' if pd.isna(row.DeleteText) else row.DeleteText
        s = s[:i] + insert + s[i+len(delete):]
    
    return s

In [None]:
def reconstructFileAtRunEvents(df):
    '''
    Reconstruct a single file and save it's state at different run events

    df - A dataframe containing a single student, for a single assignment, 
            and a single file of that assignment. Must have edit events and run events

    Returns: A string representing the final state of the file
    '''

    fileStateAtRuns = []

    s = ''

    for _,row in df.iterrows():
        if row.EventType == 'File.Edit':
            i = int(row.SourceLocation)

            insert = '' if pd.isna(row.InsertText) else row.InsertText
            delete = '' if pd.isna(row.DeleteText) else row.DeleteText
            s = s[:i] + insert + s[i+len(delete):]
        elif row.EventType == 'Run.Program':
            fileStateAtRuns.append(s)

    fileStateAtRuns.append(s)

    return fileStateAtRuns

In [None]:
student10df = getFileInStudentSubmissionRunsAndEdits(
    keystroke_df, 
    'Student10',
    'Assign10',
    'wordinator.py'
    )

In [None]:
student10df

In [None]:
student10FileStates = reconstructFileAtRunEvents(student10df)

for i,fileState in enumerate(student10FileStates): 
    print("="*40)
    print(f"File State : {i}")
    print("="*40)
    print(fileState)