# Classifications

[Legacy Classifications](http://nces.ed.gov/pubs2002/cip2000/xls/cip.zip)

(Also has legacy crosswalks)

[2010 Classifications](https://nces.ed.gov/ipeds/cipcode/Files/CIPCode2010.csv)

[2020 Classifications](https://nces.ed.gov/ipeds/cipcode/Files/CIPCode2020.csv)

# Crosswalks

[2000 to 2010](https://nces.ed.gov/ipeds/cipcode/Files/Crosswalk2000to2010.csv)

[2010 to 2020](https://nces.ed.gov/ipeds/cipcode/Files/Crosswalk2010to2020.csv)

In [1]:
import pandas as pd
import numpy as np
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen
from database.cip_history import CipCode

# import user_functions

def clean_string(text: str):
    """ strips Excel formula text and trailing periods """
    text = str(text)
    if text[:1] == '=':
        text = text[2:-1]
    if str(text).endswith('.'):
        text = text[:-1]
    return(text)

class CipFile:
    """ Class for a particular year's CIP assignments """
    def __init__(self, year: int):
        """ class constructor """
        self.year = year
        self.data = list()
        
        if self.year < 2010:
            with urlopen('http://nces.ed.gov/pubs2002/cip2000/xls/cip.zip') as resp:
                zipfile = ZipFile(BytesIO(resp.read()))
                file_name = zipfile.namelist()[0]

                with zipfile.open(file_name) as data_file:
                    df = pd.read_excel(
                        io = data_file,
                        sheet_name = f'CIP{self.year}',
                        index_col = None,
                        dtype = object)
            
            df = df.rename(columns = {
                'CIP85': 'cipcode',
                'CIP90': 'cipcode',
                'CIPDESCR': 'cipdefinition'
            })
            
            if self.year == 1985:
                df['cipdefinition'] = 'Unknown'
            elif self.year == 2000:
                df = df.query('CIPCode != "-----"')
                # df.cipcode = np.where(df.cipcode == '-----', df.reportunder, df.cipcode)

        else:
            df = pd.read_csv(
                f'https://nces.ed.gov/ipeds/cipcode/Files/CIPCode{year}.csv',
                dtype = object,
                index_col = None,
                low_memory = False,
                encoding = "iso-8859-1")
            
        df.columns = df.columns.str.strip().str.lower()
        
        cols = ['cipcode', 'cipfamily', 'ciptitle', 'cipdefinition']
        df = df[cols]
        df = df.applymap(clean_string)
        
        for row in df.itertuples(index=False):
            self.data.append(
                CipCode(
                    cipcode = row.cipcode,
                    version = self.year,
                    title = row.ciptitle,
                    description = row.cipdefinition,
                    family = row.cipfamily,
                    ))


    @property
    def year(self):
        """ return year value """
        return self.__year
    
    @year.setter
    def year(self, year: int):
        """ set year value """
        self.__year = year
        
    def __repr__(self):
        """ class repr method """
        return f'{self.__class__.__name__}(year={self.__year!r})'        

In [2]:
for year in [1985, 1990, 2000, 2010, 2020]:
    _cip = CipFile(year)
    print(f'CIP Codes for: {_cip.year}')
    print(f'{_cip.data[:5]}\n')


CIP Codes for: 1985
[CipCode(cipcode='00.0000', version=1985, title='Not Applicable', description='Unknown', family='00'), CipCode(cipcode='01.0101', version=1985, title='Agric Bus & Mgmt, Genl', description='Unknown', family='01'), CipCode(cipcode='01.0103', version=1985, title='Agricultural Economics', description='Unknown', family='01'), CipCode(cipcode='01.0199', version=1985, title='Agric Bus & Mgmt, Other', description='Unknown', family='01'), CipCode(cipcode='01.0201', version=1985, title='Agricul Mechanics, Genl', description='Unknown', family='01')]

CIP Codes for: 1990
[CipCode(cipcode='01.0000', version=1990, title='Agricultural Business and Production', description='A summary of groups of instructional programs that prepare individuals to apply scientific knowledge and methods, and techniques to agricultural business and production', family='01'), CipCode(cipcode='01.0100', version=1990, title='Agricultural Business & Management', description='A group of instructional progr

In [3]:
raw_data = pd.DataFrame()

for year in (2010, 2020):
    df = pd.read_csv(
        f'https://nces.ed.gov/ipeds/cipcode/Files/CIPCode{year}.csv',
        dtype = object)
    df['year'] = year
    print(f'{df.shape[0]} rows read.')
    raw_data = pd.concat([raw_data, df])

raw_data

2318 rows read.
2847 rows read.


Unnamed: 0,CIPFamily,CIPCode,Action,TextChange,CIPTitle,CIPDefinition,CrossReferences,Examples,year
0,"=""01""","=""01""",No substantive changes,no,"AGRICULTURE, AGRICULTURE OPERATIONS, AND RELAT...",Instructional programs that focus on agricultu...,,,2010
1,"=""01""","=""01.00""",No substantive changes,no,"Agriculture, General.",Instructional content is defined in code 01.0000.,,,2010
2,"=""01""","=""01.0000""",No substantive changes,no,"Agriculture, General.",A program that focuses on the general principl...,14.0301 - Agricultural Engineering.,,2010
3,"=""01""","=""01.01""",No substantive changes,no,Agricultural Business and Management.,Instructional content for this group of progra...,,,2010
4,"=""01""","=""01.0101""",No substantive changes,no,"Agricultural Business and Management, General.",A general program that focuses on modern busi...,,,2010
...,...,...,...,...,...,...,...,...,...
2842,"=""61""","=""61.2801""",Moved to,yes,Urology Residency Program.,A residency training program that prepares phy...,,,2020
2843,"=""61""","=""61.2802""",Moved to,yes,Pediatric Urology Fellowship Program.,A fellowship training program that prepares in...,,,2020
2844,"=""61""","=""61.2899""",New,no,"Urology Residency/Fellowship Programs, Other.",Any residency or fellowship program in urology...,,,2020
2845,"=""61""","=""61.99""",New,no,"Medical Residency/Fellowship Programs, Other.",Instructional content is defined in code 61.9999.,,,2020


In [5]:
cip = df.apply(lambda x: x.map(clean_string) if x.name in ['CIPCode', 'CIPFamily'] else x)

In [6]:
unique = cip[['CIPCode', 'year']].groupby(['CIPCode']).max()

unique

Unnamed: 0_level_0,year
CIPCode,Unnamed: 1_level_1
01,2020
01.00,2020
01.0000,2020
01.01,2020
01.0101,2020
...,...
61.2801,2020
61.2802,2020
61.2899,2020
61.99,2020


In [7]:
new_cip = cip.merge(
    right=unique,
    how = 'inner',
    on=['CIPCode', 'year']
)

new_cip

Unnamed: 0,CIPFamily,CIPCode,Action,TextChange,CIPTitle,CIPDefinition,CrossReferences,Examples,year
0,01,01,No substantive changes,yes,AGRICULTURAL/ANIMAL/PLANT/VETERINARY SCIENCE A...,Instructional programs that focus on agricultu...,,,2020
1,01,01.00,No substantive changes,no,"Agriculture, General.",Instructional content is defined in code 01.0000.,,,2020
2,01,01.0000,No substantive changes,no,"Agriculture, General.",A program that focuses on the general principl...,14.0301 - Agricultural Engineering.,,2020
3,01,01.01,No substantive changes,no,Agricultural Business and Management.,Instructional content for this group of progra...,,,2020
4,01,01.0101,No substantive changes,no,"Agricultural Business and Management, General.",A general program that focuses on modern busi...,,,2020
...,...,...,...,...,...,...,...,...,...
2842,61,61.2801,Moved to,yes,Urology Residency Program.,A residency training program that prepares phy...,,,2020
2843,61,61.2802,Moved to,yes,Pediatric Urology Fellowship Program.,A fellowship training program that prepares in...,,,2020
2844,61,61.2899,New,no,"Urology Residency/Fellowship Programs, Other.",Any residency or fellowship program in urology...,,,2020
2845,61,61.99,New,no,"Medical Residency/Fellowship Programs, Other.",Instructional content is defined in code 61.9999.,,,2020
