# Project 1 Part 1

## Imports

In [4]:
from composable import pipeable
from composable.strict import map, filter
from composable_glob import glob
from composable_utility import with_open
import composable_records as rec
import composable_tuples as tup

In [6]:
from composable_utility import get, with_open, identity
from composable_object import obj, attr
from composable_origami import fold
import pandas as pd
from functools import reduce

## File Paths

In [9]:
(paths :=
 "./attendance_example_fixed_width/*/*.txt"
 >> glob(recursive=True)
)

['./attendance_example_fixed_width\\dsci494s7\\Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width\\dsci494s7\\Practice Quiz - Module 1 - User Attempts.txt',
 './attendance_example_fixed_width\\dsci494s7\\Practice Quiz - Module 2 - User Attempts.txt',
 './attendance_example_fixed_width\\dsci494s7\\Practice Quiz - Module 3 - User Attempts.txt',
 './attendance_example_fixed_width\\dsci494s7\\Practice Quiz - Module 4 - User Attempts.txt',
 './attendance_example_fixed_width\\stat180s18\\Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width\\stat491s1\\Attendance Quiz - User Attempts.txt',
 './attendance_example_fixed_width\\stat491s1\\Practice Quiz - Module 1 - User Attempts.txt',
 './attendance_example_fixed_width\\stat491s1\\Practice Quiz - Module 2 - User Attempts.txt',
 './attendance_example_fixed_width\\stat491s1\\Practice Quiz - Module 3 - User Attempts.txt',
 './attendance_example_fixed_width\\stat491s1\\Practice Quiz - Module 4 - User Attempt

## Helper functions 

In [12]:
normalize_path = lambda path: path.replace("\\", "/")
get_course = lambda path: normalize_path(path).split("/")[-2][:-2]
get_section = lambda path: normalize_path(path).split("/")[-2][-2:]

### Test to see if get_course works properly

In [15]:
path = './attendance_example_fixed_width\\dsci494s7\\Attendance Quiz - User Attempts.txt'
print(get_course(path))

dsci494


## Unfolding the file paths into records

### Getting Header

In [19]:
header = [
    ("Org Defined ID", 9),
    ("UserName", 9),
    ("FirstName", 12),
    ("LastName", 12),
    ("Attempt #", 3),
    ("Score", 3),
    ("Out Of", 3),
    ("Attempt_Start", 20),
    ("Attempt_End", 20),
    ("Percent", 4),
]

column_names = [col[0] for col in header]
column_widths = [col[1] for col in header]

In [25]:
paths_and_lines = [
    {
        "course": get_course(path),
        "section": get_section(path),
        "header": column_names,
        "body": lines[1:],   
    }
    for path in paths
    for lines in [with_open(lambda f: f.readlines(), path)]
]


paths_and_lines[:1]  

[{'course': 'dsci494',
  'section': 's7',
  'header': ['Org Defined ID',
   'UserName',
   'FirstName',
   'LastName',
   'Attempt #',
   'Score',
   'Out Of',
   'Attempt_Start',
   'Attempt_End',
   'Percent'],
  'body': ['14460432 au9747cp Jericho     Greer       1  2  2  2019-03-08 15:01:00 2019-03-08 15:11:00 100%14460432 au9747cp Jericho     Greer       1  1  2  2019-03-08 15:00:00 2019-03-08 15:09:00 50%\n',
   '14460432 au9747cp Jericho     Greer       1  1  2  2019-03-08 15:00:00 2019-03-08 15:08:00 50%\n',
   '14460432 au9747cp Jericho     Greer       2  1  2  2019-03-08 15:01:00 2019-03-08 15:09:00 50%\n',
   '14460432 au9747cp Jericho     Greer       1  1  2  2019-03-08 15:01:00 2019-03-08 15:07:00 50%\n',
   '14460432 au9747cp Jericho     Greer       1  1  2  2019-03-08 15:00:00 2019-03-08 15:03:00 50%\n',
   '14460432 au9747cp Jericho     Greer       1  2  2  2019-03-08 15:03:00 2019-03-08 15:13:00 100%14460432 au9747cp Jericho     Greer       1  2  2  2019-03-08 15:01:00

## Combining the bodies

In [29]:
positions = [sum(column_widths[:i]) for i in range(len(column_widths) + 1)]

combined_body = [
    {
        "course": record["course"],
        "section": record["section"],
        **dict(zip(column_names, [line[positions[i]:positions[i+1]].strip() for i in range(len(column_widths))]))
    }
    for record in paths_and_lines
    for line in record["body"]
]
combined_body

[{'course': 'dsci494',
  'section': 's7',
  'Org Defined ID': '14460432',
  'UserName': 'au9747cp',
  'FirstName': 'Jericho',
  'LastName': 'Greer',
  'Attempt #': '1',
  'Score': '2',
  'Out Of': '2',
  'Attempt_Start': '2019-03-08 15:01:00',
  'Attempt_End': '2019-03-08 15:11:00',
  'Percent': '100%'},
 {'course': 'dsci494',
  'section': 's7',
  'Org Defined ID': '14460432',
  'UserName': 'au9747cp',
  'FirstName': 'Jericho',
  'LastName': 'Greer',
  'Attempt #': '1',
  'Score': '1',
  'Out Of': '2',
  'Attempt_Start': '2019-03-08 15:00:00',
  'Attempt_End': '2019-03-08 15:08:00',
  'Percent': '50%'},
 {'course': 'dsci494',
  'section': 's7',
  'Org Defined ID': '14460432',
  'UserName': 'au9747cp',
  'FirstName': 'Jericho',
  'LastName': 'Greer',
  'Attempt #': '2',
  'Score': '1',
  'Out Of': '2',
  'Attempt_Start': '2019-03-08 15:01:00',
  'Attempt_End': '2019-03-08 15:09:00',
  'Percent': '50%'},
 {'course': 'dsci494',
  'section': 's7',
  'Org Defined ID': '14460432',
  'UserNam

In [252]:
df = pd.DataFrame(parsed_records)

output_csv = "combined_attendance.csv"

df.to_csv(output_csv, index=False)