# Merge CallSign, CMA and Location

In [1]:
import pandas as pd
import csv
import re

## References

`re.`:
* https://medium.com/factory-mind/regex-tutorial-a-simple-cheatsheet-by-examples-649dc1c3f285
* https://docs.python.org/3/library/re.
* https://docs.python.org/3/howto/regex.html#regex-howto
* https://www.journaldev.com/23763/python-remove-spaces-from-string

`csv.`:
* https://docs.python.org/3/library/csv.html#csv-fmt-params

## Create Table of CMA number, area name, state
* note CMA001 - CMA306 are MSA (Metropolitan Service Areas)
* CMA307 onwards are RSA (Rural Service Areas)

In [6]:
# import cma_dropdown from csv
with open("data-cache/cma-dropdown_list.csv", newline='\n') as f:
    reader = csv.reader(f, delimiter='-')
    m_dropdown = list(reader)

In [7]:
# split cma_no into first col
dropdown = []
for row in m_dropdown[1:]:
    dropdown.append([row[0].strip(), '-'.join(row[1:]).strip()])
dropdown

[['CMA001', 'New York, NY-NJ/Nassau-Suffolk'],
 ['CMA002', 'Los Angeles-Long Beach/Anaheim'],
 ['CMA003', 'Chicago, IL'],
 ['CMA004', 'Philadelphia, PA'],
 ['CMA005', 'Detroit/Ann Arbor, MI'],
 ['CMA006', 'Boston-Lowell-Brockton-Lawrence-Haverhill, MA-NH'],
 ['CMA007', 'San Francisco-Oakland, CA'],
 ['CMA008', 'Washington, DC-MD-VA'],
 ['CMA009', 'Dallas-Fort Worth, TX'],
 ['CMA010', 'Houston, TX'],
 ['CMA011', 'St. Louis, MO-IL'],
 ['CMA012', 'Miami-Fort Lauderdale-Hollywood, FL'],
 ['CMA013', 'Pittsburgh, PA'],
 ['CMA014', 'Baltimore, MD'],
 ['CMA015', 'Minneapolis-St. Paul, MN-WI'],
 ['CMA016', 'Cleveland, OH'],
 ['CMA017', 'Atlanta, GA'],
 ['CMA018', 'San Diego, CA'],
 ['CMA019', 'Denver-Boulder, CO'],
 ['CMA020', 'Seattle-Everett, WA'],
 ['CMA021', 'Milwaukee, WI'],
 ['CMA022', 'Tampa-St. Petersburg, FL'],
 ['CMA023', 'Cincinnati, OH-KY-IN'],
 ['CMA024', 'Kansas City, MO-KS'],
 ['CMA025', 'Buffalo, NY'],
 ['CMA026', 'Phoenix, AZ'],
 ['CMA027', 'San Jose, CA'],
 ['CMA028', 'Indiana

In [8]:
# check striped line matches original dropdown item

with open("data-cache/cma-dropdown_list.csv", "r") as f:
    test_line = f.readlines()[1].strip()
    
assert(test_line == ' - '.join(dropdown[0]))

In [9]:
# add MSA/RSA variable at i=306 

for row in dropdown[:306]:
    row.append('MSA')
for row in dropdown[306:]:
    row.append('RSA')

In [10]:
# check RSA strings don't have any state codes
pattern_statecode = '[A-Z]{2}'

re_bool_statecode = []

for row in dropdown[306:]:
    string = row[1]
    re_bool_statecode.append(bool(re.search(pattern_statecode,string)))

assert(sum(re_bool_statecode) == 0)

In [11]:
# extract and append state codes for some MSAs

for row in dropdown:
    string = row[1]
    row.append(re.findall(pattern_statecode, string))        

In [12]:
# check MSA strings don't have any state numbers

pattern_statenum = '\d'
re_bool_statenum = []

for row in dropdown[:306]:
    string = row[1]
    re_bool_statenum.append(
        bool(re.search(pattern_statenum,string))
    )

assert(sum(re_bool_statenum) == 0)

In [13]:
# extract state name for RSAs

pattern = '([A-Za-z]+)(?:\s)(\d+)(?: - )(\D+)'

for row in dropdown:
    string = row[1]
    m = re.search(pattern, string)
    if m:
        row.append(list(m.groups()))
    else:
        row.append([])


In [14]:
# flatten last 

In [15]:
dropdown_cols = ['cma_code', 'cma_dscr', 'area_type', 'msa_state', 'rsa_dscr']
dropdown_df = pd.DataFrame(dropdown, columns=dropdown_cols)

In [17]:
# extract number component of CMA code

dropdown_df.insert(0, 'CMA', dropdown_df['cma_code'].apply(lambda x: x[3:]))

In [19]:
dropdown_4merge = dropdown_df.drop(['cma_code', 'msa_state', 'rsa_dscr'], 1)

### join cma-cnty with cma-dropdown

In [20]:
# import cma to country lookup table
# downloaded from https://www.cellularmaps.com/fcc_markets.shtml

cnty_df = pd.read_excel('data-cache/cmacnty1990.xls')

In [21]:
# pad CMA to string of length 3 for join with cma_lookup
cnty_df['CMA'] = cnty_df['CMA'].apply(lambda x: str(x).zfill(3))

In [22]:
# left merge cma_cnty info with dropdown descriptions using CMA

cnty_dropdown = cnty_df.merge(dropdown_4merge, on='CMA')

In [23]:
# 
cnty_dropdown.sort_values(by='CMA')

# how many states in each CMA
gb_CMA = cnty_dropdown.groupby('CMA')
gb_CMA.nunique()

# how many CMAs in particular state
gb_state = cnty_dropdown.groupby('State')
gb_state.nunique()

Unnamed: 0_level_0,County,State,FIPS,CMA,cma_dscr,area_type
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AK,25,1,25,4,4,2
AL,67,1,67,18,18,2
AR,75,1,75,18,18,2
AS,5,1,5,1,1,1
AZ,15,1,15,8,8,2
CA,58,1,58,31,31,2
CO,63,1,63,14,14,2
CT,8,1,8,6,6,2
DC,1,1,1,1,1,1
DE,3,1,3,2,2,2


## Merge dropdown & county info with scraped callsigns

In [26]:
# import scraped cma-callsign data from csv
scrape_df = pd.read_csv("data-cache/cma_df.csv", dtype=object)
print(scrape_df.columns)

# filter for Cellular Licenses only
is_CL = (scrape_df['radio_service'] == 'CL')
scrapeCL_df = scrape_df[is_CL]

# extract number component of CMA code
scrapeCL_df.insert(0,
             'CMA',
             scrapeCL_df['cma_no'].apply(lambda x: x[3:]))

# create look up table
scrapeCL_4merge = scrapeCL_df.filter(['CMA', 'callsign_leaseID'])

Index(['cma_no', 'callsign_leaseID', 'FRN_name', 'FRN_no', 'radio_service',
       'status', 'date_expr'],
      dtype='object')


In [27]:
# merge table of cma+counties with callsigns

callsigns_cma = scrapeCL_4merge.merge(cnty_dropdown, on='CMA')

In [28]:
# check number of callsigns per CMA
callsigns_cma.groupby('CMA').nunique()

Unnamed: 0_level_0,CMA,callsign_leaseID,County,State,FIPS,cma_dscr,area_type
CMA,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
001,1,2,17,2,17,1,1
002,1,6,4,1,4,1,1
003,1,2,6,1,6,1,1
004,1,2,8,2,8,1,1
005,1,2,7,1,7,1,1
006,1,3,6,2,6,1,1
007,1,3,5,1,5,1,1
008,1,2,13,3,13,1,1
009,1,2,11,1,11,1,1
010,1,2,6,1,6,1,1


In [30]:
callsigns_cma.head(20)

Unnamed: 0,CMA,callsign_leaseID,County,State,FIPS,cma_dscr,area_type
0,1,KNKA206,Bergen,NJ,34003,"New York, NY-NJ/Nassau-Suffolk",MSA
1,1,KNKA206,Essex,NJ,34013,"New York, NY-NJ/Nassau-Suffolk",MSA
2,1,KNKA206,Hudson,NJ,34017,"New York, NY-NJ/Nassau-Suffolk",MSA
3,1,KNKA206,Morris,NJ,34027,"New York, NY-NJ/Nassau-Suffolk",MSA
4,1,KNKA206,Passaic,NJ,34031,"New York, NY-NJ/Nassau-Suffolk",MSA
5,1,KNKA206,Somerset,NJ,34035,"New York, NY-NJ/Nassau-Suffolk",MSA
6,1,KNKA206,Union,NJ,34039,"New York, NY-NJ/Nassau-Suffolk",MSA
7,1,KNKA206,Bronx,NY,36005,"New York, NY-NJ/Nassau-Suffolk",MSA
8,1,KNKA206,Kings,NY,36047,"New York, NY-NJ/Nassau-Suffolk",MSA
9,1,KNKA206,Nassau,NY,36059,"New York, NY-NJ/Nassau-Suffolk",MSA
