In [1]:
import os
os.getcwd()

'D:\\Documents\\PycharmProjects\\LEADING-RAMP'

Script from Jon (modify file path to datasets) 

In [2]:
import pandas as pd
from zipfile import ZipFile
import glob

In [3]:
def extract_subset_ramp_data(zip_file, ir_repo_id):
    """This function tries to conserve memory by opening zipped RAMP monthly
       data files one at a time and subsetting the data to a single repository's
       data for that month, prior to further processing or aggregation.

    Parameters
    ----------

    zip_file:
        String. A file path pointing to a zip file.

    ir_repo_id:
        String. A locally unique repository identifier which will be used to
        subset the unzipped data.

    Returns
    -------

    ir_data:
        A Pandas dataframe. The subset of RAMP data for the specified
        repository and month.

    """
    with ZipFile(zip_file) as rampzip:
        with rampzip.open(rampzip.namelist()[0]) as rampfile:
            ramp_df = pd.read_csv(rampfile)
    ir_data = ramp_df[ramp_df["repository_id"] == ir_repo_id].copy()
    return ir_data


In [4]:
def get_ir_data(ir_repo_id, cols, file_list):
    """This function iterates through a list of zipped RAMP data files to
       aggregate a subset of complete RAMP data for a single IR. Creates
       an empty Pandas dataframe and then appends monthly data to it.

    Parameters
    ----------

    ir_repo_id:
        String. A locally unique repository identifier, designating the
        repository whose data will be aggregated.

    cols:
        List. Column names to be used in the empty dataframe. Appended data will
        have the same columns by default.

    file_list:
        List. File paths to monthly RAMP data in zipped format.

    Returns
    -------

    ir_data:
        A Pandas dataframe. The aggregated RAMP data for the specified IR
        across all months included in the file_list.

    """
    ir_data = pd.DataFrame(columns=cols)
    # append data from each month
    for mo_data in file_list:
        print(mo_data)
        ir_data = ir_data.append(extract_subset_ramp_data(mo_data, ir_repo_id))
    return ir_data

In [5]:
def get_v1_data(ir_repo_id):
    """This function aggregates all RAMP data that was harvested for a single IR
       between January 1, 2017 and August 18, 2018 ("v1" data). Column names
       for those data are hard coded. A list of data files is also generated.

       TODO: It may be useful to remove this function and subset the v1
       data for each month as part of the v2 aggregations. Leaving it for
       now in case we want the option to store/save v1 data by itself.

    Parameters
    ----------

    ir_repo_id:
        String. A locally unique repository identifier, designating the
        repository whose data will be aggregated.

    Returns
    -------

    ir_v1_data:
        A Pandas dataframe. The aggregated RAMP "v1" data for the specified IR
        across all months included in the all_data_file_list.
    """
    all_cols = ['citableContent', 'clickThrough', 'clicks', 'country', 'date',
                'device', 'impressions', 'index', 'position', 'url',
                'repository_id']    
    
    all_data_file_list = glob.glob("D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020/*all.zip")
    ir_v1_data = get_ir_data(ir_repo_id, all_cols, all_data_file_list)
    return ir_v1_data


In [6]:
def get_v2_pc_data(ir_repo_id):
    """This function aggregates all RAMP page click data harvested for a single
       IR since August 19, 2018 ("v2" data). Column names for those data are
       hard coded. A list of data files is also generated.

    Parameters
    ----------

    ir_repo_id:
        String. A locally unique repository identifier, designating the
        repository whose data will be aggregated.

    Returns
    -------

    ir_v2_pc_data:
        A Pandas dataframe. The aggregated RAMP page clicks data for the
        specified IR across all months included in the pageclick_data_file_list.
    """
    pageclick_cols = ['citableContent', 'clickThrough', 'clicks', 'date',
                      'impressions', 'index', 'position', 'url',
                      'repository_id']
    pageclick_data_file_list = glob.glob("D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020/*all_page-clicks.zip")
    ir_v2_pc_data = get_ir_data(ir_repo_id, pageclick_cols,
                                pageclick_data_file_list)
    return ir_v2_pc_data

In [7]:
def concat_ramp_versions(v1_data, v2_data):
    subset_cols = list(v2_data.columns.values)
    v1_subset = v1_data[subset_cols].copy()
    v1_v2_concatenated = pd.concat([v1_subset, v2_data], ignore_index=True)
    return v1_v2_concatenated

In [8]:
def process_repo(ir_repo_id):
    """This is basically a workflow function that calls all the other functions.

    Parameters
    ----------

    ir_repo_id:
       String. A locally unique identifier for the repository whose data will
       be aggregated.

    Returns
    -------

    ir_complete_pc_data:
       A Pandas dataframe. The aggregated page click data across all the
       years/months specified for the specified repository.

    ir_complete_ai_data:
       A Pandas dataframe. The aggreated country/device data across all the
       years/months specified for the specified repository.

    """
    ir_v1_data = get_v1_data(ir_repo_id)
    ir_v2_pc_data = get_v2_pc_data(ir_repo_id)
    #ir_v2_ai_data = get_v2_ai_data(ir_repo_id)
    ir_complete_pc_data = concat_ramp_versions(ir_v1_data, ir_v2_pc_data)
    #ir_complete_ai_data = concat_ramp_versions(ir_v1_data, ir_v2_ai_data)
    return ir_complete_pc_data #, ir_complete_ai_data

In [9]:
msu_pc_2020 = process_repo("montana_state_university")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [10]:
msu_pc_2020 

Unnamed: 0,citableContent,clickThrough,clicks,date,impressions,index,position,url,repository_id
0,Yes,0.00,0.0,2020-01-04,1.0,montana_page_clicks,161.00,http://scholarworks.montana.edu/xmlui/bitstrea...,montana_state_university
1,Yes,0.00,0.0,2020-01-04,1.0,montana_page_clicks,139.00,http://scholarworks.montana.edu/xmlui/bitstrea...,montana_state_university
2,Yes,0.00,0.0,2020-01-04,1.0,montana_page_clicks,298.00,http://scholarworks.montana.edu/xmlui/bitstrea...,montana_state_university
3,Yes,0.04,1.0,2020-01-04,25.0,montana_page_clicks,49.56,http://scholarworks.montana.edu/xmlui/bitstrea...,montana_state_university
4,Yes,0.50,1.0,2020-01-04,2.0,montana_page_clicks,4.00,http://scholarworks.montana.edu/xmlui/bitstrea...,montana_state_university
...,...,...,...,...,...,...,...,...,...
1772461,No,0.00,0.0,2020-12-05,1.0,msu_scholarworks_page_clicks,12.00,https://scholarworks.montana.edu/xmlui/recent-...,montana_state_university
1772462,No,0.00,0.0,2020-12-05,1.0,msu_scholarworks_page_clicks,11.00,https://scholarworks.montana.edu/xmlui/recent-...,montana_state_university
1772463,No,0.00,0.0,2020-12-05,5.0,msu_scholarworks_page_clicks,23.20,https://scholarworks.montana.edu/xmlui/recent-...,montana_state_university
1772464,No,0.00,0.0,2020-12-05,1.0,msu_scholarworks_page_clicks,10.00,https://scholarworks.montana.edu/xmlui/recent-...,montana_state_university


In [11]:
msu_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/msu_pc_2020.csv", header = True)

In [12]:
boston_pc_2020 = process_repo("boston_university")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [13]:
boston_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/boston_pc_2020.csv", header = True)

In [14]:
caltech_a_pc_2020 = process_repo("california_tech_authors")
caltech_t_pc_2020 = process_repo("california_tech_thesis")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [15]:
caltech_a_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/caltech_a_pc_2020.csv", header = True)
caltech_t_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/caltech_t_pc_2020.csv", header = True)

In [16]:
pittsburgh_pc_2020 = process_repo("university_pittsburgh")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [17]:
pittsburgh_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/pittsburgh_pc_2020.csv", header = True)

In [18]:
vatech_pc_2020 = process_repo("virginia_tech")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [19]:
vatech_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/vatech_pc_2020.csv", header = True)

In [20]:
ut_pc_2020 = process_repo("university_texas")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [21]:
ut_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/ut_pc_2020.csv", header = True)

In [22]:
unlv_pc_2020 = process_repo("university_nevada_las_vegas")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [23]:
unlv_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/unlv_pc_2020.csv", header = True)

In [24]:
unl_pc_2020 = process_repo("university_nebraska_lincoln")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [25]:
unl_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/unl_pc_2020.csv", header = True)

In [26]:
uken_pc_2020 = process_repo("university_kentucky")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [27]:
uken_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/uken_pc_2020.csv", header = True)

In [28]:
uariz_pc_2020 = process_repo("university_arizona")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [29]:
uariz_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/uariz_pc_2020.csv", header = True)

In [30]:
swarthmore_pc_2020 = process_repo("swarthmore_college")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [31]:
swarthmore_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/swarthmore_pc_2020.csv", header = True)

In [32]:
samhouston_pc_2020 = process_repo("sam_houston_state_university")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [33]:
samhouston_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/samhouston_pc_2020.csv", header = True)

In [34]:
rutgers_pc_2020 = process_repo("rutgers_university")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [35]:
rutgers_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/rutgers_pc_2020.csv", header = True)

In [36]:
northeast_pc_2020 = process_repo("northeastern_university")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [37]:
northeast_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/northeast_pc_2020.csv", header = True)

In [38]:
unm_pc_2020 = process_repo("university_new_mexico")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [39]:
unm_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/unm_pc_2020.csv", header = True)

In [40]:
nky_pc_2020 = process_repo("northern_kentucky_university")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [41]:
nky_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/nky_pc_2020.csv", header = True)

In [42]:
umich_pc_2020 = process_repo("university_michigan")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [43]:
umich_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/umich_pc_2020.csv", header = True)

In [44]:
umaryland_pc_2020 = process_repo("maryland_drum")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [45]:
umaryland_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/umaryland_pc_2020.csv", header = True)

In [46]:
kansas_pc_2020 = process_repo("kansas_state_university")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [47]:
kansas_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/kansas_pc_2020.csv", header = True)

In [48]:
umontana_pc_2020 = process_repo("university_montana")

D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-01_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-02_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-03_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-04_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-05_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-06_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-07_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-08_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-09_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-10_RAMP_all_page-clicks.zip
D:/Documents/PycharmProjects/LEADING-RAMP/ir_data/Ramp2020\2020-11_RAMP_all_page

In [49]:
umontana_pc_2020.to_csv("D://Documents//PycharmProjects//LEADING-RAMP//ir_data//Ramp2020//Export/umontana_pc_2020.csv", header = True)

It took about 2 hours to export these IR yearly file. 