# Import All Necessary Modules And Setup Project

If you get any errors when importing these, ensure you run the commands:
```bash
$ python -m pip install -r requirements.txt
```
to install all necessary modules for this project. This command must be run from inside of this project directory.

It is recommended to use virtual environments for this project to ensure there is no conflicting package versions on your system.

Activate the virtual environment (if needed), run the pip install command, and then launch Jupyter Lab inside this project to get this project running.

In [None]:
import pandas as pd

## Define Constants

In [None]:
OUTPUT_FILE = 'data/reconstructionFailures.csv'

STUDENT_COL = 'Student'
ASSN_COL = 'Assignment'
FILE_COL = 'FileName'
EXECUTION_COL = 'ExecutionState'

# File successfully ran
EC_SUCCESS = 0
# File had an indentation or syntax error, probably bad reconstruction
EC_IND_OR_STX_ERR = 1
# File had an import error
#   Most likely because the 'modules' directory doesn't exist
#   Possibly due to reconstruction error
EC_IMPORT_ERR = 2
# Some other error occurred, and we really need to do some digging to figure this out
EC_OTHER_ERR = 3

## Load Data

In [None]:
reconstruction_runnability_df_uneditied = pd.read_csv('data/reconstructionRunnability.csv')

In [None]:
reconstruction_runnability_df = reconstruction_runnability_df_uneditied.copy()

In [None]:
reconstruction_runnability_df[EXECUTION_COL].value_counts()

#### Check All Import Errors

After checking all import errors, it was found that the import errors were *actual* import errors, and not due to bad reconstructions. These can all be converted into `0`, indicating a successful reconstruction!

In [None]:
reconstruction_runnability_df[EXECUTION_COL] = reconstruction_runnability_df[EXECUTION_COL].apply(lambda x: 0 if x == 2 else x)

## List All Broken Files

In [None]:
syntax_errs = reconstruction_runnability_df[reconstruction_runnability_df[EXECUTION_COL] == EC_IND_OR_STX_ERR]
other_errs = reconstruction_runnability_df[reconstruction_runnability_df[EXECUTION_COL] == EC_OTHER_ERR]

In [None]:
syntax_errs[STUDENT_COL].value_counts()

In [None]:
other_errs[STUDENT_COL].value_counts()

#### Syntax Errors List

These errors are *probably* due to bad reconstructions, but some may be because of students making mistakes. We need to quickly analyze these, and remove any bad reconstructions from our dataset.

In [None]:
print("Syntax Errors:")
for idx,row in syntax_errs.iterrows():
    print(f"(\"{row[STUDENT_COL]}\", \"{row[ASSN_COL]}\", \"{row[FILE_COL]}\"),")

#### Other Errors List

These errors are *probably* due to other issues in the student's code. We probably want these in our dataset, but should at least verify that they are not broken due to our reconstructions.

In [None]:
print("Other Errors:")
for idx,row in other_errs.iterrows():
    print(f"(\"{row[STUDENT_COL]}\", \"{row[ASSN_COL]}\", \"{row[FILE_COL]}\"),")

In [None]:
# These need to be marked as "okay," regardless of error status
GOOD_RECONSTRUCTIONS_WITH_OTHER_ERRORS = (
    # Other Errors
    ("Student1", "Assign9", "unit5_task1_starter.py"),
    ("Student15", "Assign7", "junk.py"),
    ("Student15", "Assign7", "multiplication table.py"),
    ("Student19", "Assign7", "assn7-task2-starter.py"),
    ("Student2", "Assign6", "task2.py"),
    ("Student20", "Assign11", "Task 1.py"),
    ("Student20", "Assign12", "Task 1.py"),
    ("Student20", "Assign6", "Task 1.py"),
    ("Student20", "Assign6", "Task 2.py"),
    ("Student20", "Assign8", "task1.py"),
    ("Student20", "Assign9", "Task 2.py"),
    ("Student21", "Assign10", "test it out.py"),
    ("Student28", "Assign6", "task1.py"),
    ("Student29", "Assign13", "memory test.py"),
    ("Student29", "Assign13", "messing around.py"),
    ("Student29", "Assign13", "messing round.py"),
    ("Student29", "Assign6", "messing round.py"),
    ("Student29", "Assign8", "messing round 2.py"),
    ("Student29", "Assign8", "messing round.py"),
    ("Student39", "Assign6", "Task2.py"),
    ("Student6", "Assign6", "task1.py"),
    ("Student6", "Assign8", "starter.py"),
)

GOOD_RECONSTRUCTIONS_WITH_SYNTAX_ERRORS = (
    # Syntax Errors
    ("Student12", "Assign10", "wordinator.py"),
    ("Student12", "Assign12", "card.py"),
    ("Student12", "Assign13", "gameboard.py"),
    ("Student12", "Assign13", "memorycard.py"),
    ("Student12", "Assign7", "task2.py"),
    ("Student16", "Assign10", "plan1.py"),
    ("Student16", "Assign7", "chessboard.py"),
    ("Student16", "Assign7", "task2.py"),
    ("Student16", "Assign8", "plan1.py"),
    ("Student16", "Assign9", "task2.py"),
    ("Student19", "Assign7", "task1.py"),
    ("Student19", "Assign8", "task1.py"),
    ("Student2", "Assign11", "main.py"),
    ("Student2", "Assign11", "task1.py"),
    ("Student2", "Assign12", "main.py"),
    ("Student2", "Assign6", "task1.py"),
    ("Student2", "Assign7", "main.py"),
    ("Student2", "Assign8", "main.py"),
    ("Student2", "Assign8", "pattern.py"),
    ("Student2", "Assign8", "task1.py"),
    ("Student2", "Assign9", "main.py"),
    ("Student2", "Assign9", "task1.py"),
    ("Student2", "Assign9", "task2.py"),
    ("Student20", "Assign9", "blobber.py"),
    ("Student22", "Assign12", "task3.py"),
    ("Student22", "Assign6", "task2.py"), #This one is questionable. A single quotation is missing, which may be the student's mistake. It is good enough of a reconstruction that the midscore computation won't be affected
    ("Student22", "Assign8", "task1.py"),
    ("Student22", "Assign9", "task2.py"),
    ("Student29", "Assign12", "plan3.py"),
    ("Student29", "Assign13", "plan1.txt.py"),
    ("Student29", "Assign8", "main.py"),
    ("Student3", "Assign6", "task1.py"),
    ("Student3", "Assign9", "blobber.py"),
    ("Student30", "Assign8", "task1.py"),
    ("Student30", "Assign8", "Task1.py"),
    ("Student31", "Assign7", "drawChessboard.py"),
    ("Student32", "Assign12", "task2.py"),
    ("Student32", "Assign9", "blobber.py"),
    ("Student34", "Assign7", "task2.py"),
    ("Student36", "Assign12", "card.py"),
    ("Student36", "Assign12", "deck.py"),
    ("Student36", "Assign12", "task3.py"),
    ("Student36", "Assign9", "blobber.py"),
    ("Student37", "Assign8", "task1.py"),
    ("Student39", "Assign9", "Blobber.py"),
    ("Student39", "Assign9", "Task2.py"),
    ("Student40", "Assign7", "task2.py"),
    ("Student43", "Assign12", "task3.py"),
    ("Student6", "Assign10", "task1.py"),
    ("Student6", "Assign12", "task3.py"),
    ("Student6", "Assign13", "memoryboard.py"),
    ("Student6", "Assign13", "task1.py"),
    ("Student9", "Assign8", "task1.py"),
)

# These are bad reconstructions
    #("Student12", "Assign7", "chessboard.py"),
    # ("Student12", "Assign9", "task1.py"),
    # ("Student12", "Assign9", "task2.py"),
    # ("Student12", "Assign9", "testDraw.py"),
    # ("Student15", "Assign6", "scratchWork.py"),
    # ("Student15", "Assign6", "task2.py"),
    # ("Student16", "Assign11", "task1.py"),
    # ("Student16", "Assign13", "task2.py"),
    #("Student16", "Assign6", "task1.py"),
    # ("Student16", "Assign9", "blobber.py"),
    # ("Student19", "Assign10", "wordinator.py"),
    # ("Student19", "Assign11", "orbian.py"),
    # ("Student19", "Assign8", "pattern.py"),
    #("Student20", "Assign7", "Task1.py"),
    #("Student21", "Assign13", "memory_test.py"),
    #("Student22", "Assign7", "task2.py"),
    #("Student22", "Assign8", "pattern.py"),
    #("Student24", "Assign13", "task1.py"),
    #("Student24", "Assign7", "task1.py"),
    #("Student26", "Assign11", "Task1.py"),
    #("Student26", "Assign13", "Task1.py"),
    #("Student26", "Assign7", "chessboard.py"),
    #("Student26", "Assign7", "Task2.py"),
    #("Student26", "Assign8", "pattern.py"),
    #("Student26", "Assign8", "Task1.py"),
    #("Student26", "Assign9", "Task1.py"),
    # ("Student27", "Assign12", "task2.py"),
    # ("Student27", "Assign13", "task2.py"),
    # ("Student27", "Assign9", "task1.py"),
    # ("Student29", "Assign12", "task2.py"),
    # ("Student30", "Assign13", "junk.py"),
    # ("Student30", "Assign8", "pattern.py"),
    # ("Student33", "Assign12", "task3.py"),
    # ("Student33", "Assign9", "task1.py"),
    # ("Student33", "Assign9", "task2.py"),
    # ("Student34", "Assign7", "chessboard.py"),
    # ("Student37", "Assign6", "task1.py"),
    # ("Student37", "Assign8", "pattern.py"),
    # ("Student38", "Assign6", "task1.py"),
    # ("Student39", "Assign10", "junk.py"),
    # ("Student39", "Assign10", "Task1.py"),
    # ("Student39", "Assign6", "Task1.py"),
    # ("Student39", "Assign7", "task1.py"),
    # ("Student39", "Assign7", "Task1.py"),
    # ("Student4", "Assign6", "task2.py"),
    # ("Student40", "Assign7", "chessboard.py"),
    # ("Student40", "Assign8", "task1.py"),
    # ("Student40", "Assign9", "task1.py"),
    # ("Student43", "Assign7", "task1.py"),
    # ("Student5", "Assign12", "task3.py"),
    # ("Student6", "Assign10", "wordinator.py"),
    # ("Student6", "Assign13", "memorytest.py"),
    # ("Student7", "Assign13", "memory_test.py"),
    # ("Student7", "Assign6", "task1.py"),
    # ("Student7", "Assign7", "task1.py"),
    # ("Student9", "Assign12", "task3.py"),
    # ("Student9", "Assign13", "task2.py"),
    # ("Student9", "Assign6", "task1.py"),
    # ("Student9", "Assign7", "task2.py"),
    # ("Student9", "Assign8", "pattern.py"),
    # ("Student9", "Assign9", "task1.py"),

In [None]:
toFix = (GOOD_RECONSTRUCTIONS_WITH_OTHER_ERRORS, GOOD_RECONSTRUCTIONS_WITH_SYNTAX_ERRORS)

for fix in toFix:
    for fileTuple in fix:
        # mark the student, assn, file as a good reconstruction
        reconstruction_runnability_df.loc[
                  ( reconstruction_runnability_df[STUDENT_COL] == fileTuple[0] )
                & ( reconstruction_runnability_df[ASSN_COL]    == fileTuple[1] )
                & ( reconstruction_runnability_df[FILE_COL]    == fileTuple[2] )
                , EXECUTION_COL] = 0

In [None]:
reconstruction_runnability_df = reconstruction_runnability_df.rename(columns={EXECUTION_COL : 'ReconstructionFailure'})

## Save File

Save the dataframe into a `.csv` file.

In [None]:
reconstruction_runnability_df.to_csv(OUTPUT_FILE)