In [1]:
import unicodecsv
from datetime import datetime as dt

In [2]:
# Takes a date as a string, and returns a Python datetime object. 
# If there is no date given, returns None
def parse_date(date):
    if date == '':
        return None
    else:
        return dt.strptime(date, '%Y-%m-%d')
    
# Takes a string which is either an empty string or represents an integer,
# and returns an int or None.
def parse_maybe_int(i):
    if i == '':
        return None
    else:
        return int(i)

# Get number of unique users in the dataset
def get_number_of_users(data):
    users = []
    for row in data:
        if row['account_key'] not in users:
            users.append(row['account_key'])
    
    return len(users)

In [3]:
with open('enrollments.csv', 'rb') as f:
    reader = unicodecsv.DictReader(f)
    enrollments = list(reader)

# Clean up the data types in the enrollments table and format
for enrollment in enrollments:
    enrollment['cancel_date'] = parse_date(enrollment['cancel_date'])
    enrollment['days_to_cancel'] = parse_maybe_int(enrollment['days_to_cancel'])
    enrollment['is_canceled'] = enrollment['is_canceled'] == 'True'
    enrollment['is_udacity'] = enrollment['is_udacity'] == 'True'
    enrollment['join_date'] = parse_date(enrollment['join_date'])

print(enrollments[0])
print(f'\nNumber of rolls: {len(enrollments)}')
print(f'\nNumber of unique users: {get_number_of_users(enrollments)}')

{'account_key': '448', 'status': 'canceled', 'join_date': datetime.datetime(2014, 11, 10, 0, 0), 'cancel_date': datetime.datetime(2015, 1, 14, 0, 0), 'days_to_cancel': 65, 'is_udacity': True, 'is_canceled': True}

Number of rolls: 1640

Number of unique users: 1302


In [4]:
with open('daily_engagement.csv', 'rb') as f:
    reader = unicodecsv.DictReader(f)
    daily_engagement = list(reader)

# Clean up the data types in the engagement table
for engagement_record in daily_engagement:
    engagement_record['account_key'] = engagement_record.pop('acct')
    engagement_record['lessons_completed'] = int(float(engagement_record['lessons_completed']))
    engagement_record['num_courses_visited'] = int(float(engagement_record['num_courses_visited']))
    engagement_record['projects_completed'] = int(float(engagement_record['projects_completed']))
    engagement_record['total_minutes_visited'] = float(engagement_record['total_minutes_visited'])
    engagement_record['utc_date'] = parse_date(engagement_record['utc_date'])    

# Code to save all users
users = []
for row in daily_engagement:
    if row['account_key'] not in users:
        users.append(row['account_key'])

print(daily_engagement[0])
print(f'\nNumber of rolls: {len(daily_engagement)}')
print(f'\nNumber of unique users: {get_number_of_users(daily_engagement)}')

{'utc_date': datetime.datetime(2015, 1, 9, 0, 0), 'num_courses_visited': 1, 'total_minutes_visited': 11.6793745, 'lessons_completed': 0, 'projects_completed': 0, 'account_key': '0'}

Number of rolls: 136240

Number of unique users: 1237


In [5]:
with open('project_submissions.csv', 'rb') as f:
    reader = unicodecsv.DictReader(f)
    project_submissions = list(reader)
    
# Clean up the data types in the submissions table
for submission in project_submissions:
    submission['completion_date'] = parse_date(submission['completion_date'])
    submission['creation_date'] = parse_date(submission['creation_date'])

# Code to save all users
users = []
for row in project_submissions:
    if row['account_key'] not in users:
        users.append(row['account_key'])

print(project_submissions[0])
print(f'\nNumber of rolls: {len(project_submissions)}')
print(f'\nNumber of unique users: {get_number_of_users(project_submissions)}')

{'creation_date': datetime.datetime(2015, 1, 14, 0, 0), 'completion_date': datetime.datetime(2015, 1, 16, 0, 0), 'assigned_rating': 'UNGRADED', 'account_key': '256', 'lesson_key': '3176718735', 'processing_state': 'EVALUATED'}

Number of rolls: 3642

Number of unique users: 743


In [6]:
def get_week_students(data):
    users = []
    for row in data:
        if row['account_key'] not in users:
            users.append(row['account_key'])
    
    return len(users)

get_unique_students(enrollments)

for enrollments in enrollments:
    student = enrollment['account_key']
    if student not in unique_engagement_students and enrollment['join_date'] != enrollment['cancel_date']:
        num_problem_students += 1

num_problem_students

0