## Imports and common functions

In [10]:
import seaborn as sb
import pandas as pd
import numpy as np

In [25]:
def load_dat_file(file_name, n_components):  
    """
    Loads in .dat files as lists. 
    
    Inputs: 
        file_name (string): name of file to load in
        n_components (int): number of componenets in row
        
    Outputs:
        (list): list of rows in data file
    """
    new_lines = []
    with open(file_name) as input_file:
        for line in input_file:
            new_line = [x.strip() for x in line.split('|')]
            
            if len(new_line) == n_components:
                new_lines.append(new_line)
    return new_lines

## Load in data

In [16]:
# load in data files
raw_checkin_data = load_dat_file('./raw_data/checkins.dat', 6)
raw_ratings_data = load_dat_file('./raw_data/ratings.dat', 3)
raw_socialgraph_data = load_dat_file('./raw_data/socialgraph.dat', 2)
raw_user_data = load_dat_file('./raw_data/users.dat', 3)
raw_venues_data = load_dat_file('./raw_data/venues.dat', 3)

In [23]:
# convert above data to pandas for easy analysis 
checkin_df = pd.DataFrame(raw_checkin_data[1:], columns=raw_checkin_data[0])
ratings_df = pd.DataFrame(raw_ratings_data[1:], columns=raw_ratings_data[0])
socialgraph_df = pd.DataFrame(raw_socialgraph_data[1:], columns=raw_socialgraph_data[0])
user_df = pd.DataFrame(raw_user_data[1:], columns=raw_user_data[0])
venues_df = pd.DataFrame(raw_venues_data[1:], columns=raw_venues_data[0])

In [24]:
# export data as csv 
checkin_df.to_csv('./raw_data/checkin.csv', index=False)
ratings_df.to_csv('./raw_data/ratings.csv', index=False)
socialgraph_df.to_csv('./raw_data/socialgraph.csv', index=False)
user_df.to_csv('./raw_data/user.csv', index=False)
venues_df.to_csv('./raw_data/venues.csv', index=False)