### Import Library

In [1]:
import pandas as pd
import numpy as np
import folium
from googlegeocoder import GoogleGeocoder
import unicodecsv as csv

### Data import

In [3]:
df = pd.read_csv('us_pharma_02_map_top.csv', encoding='latin-1')


### Data 

In [4]:
df.shape

(308, 10)

In [5]:
df.isnull().sum()

company           0
city             42
state            29
full_address      0
zip_code         44
website          50
career           69
top             264
rank            264
AI              278
dtype: int64

In [6]:
df.head()

Unnamed: 0,company,city,state,full_address,zip_code,website,career,top,rank,AI
0,Pfizer Inc.,New York,NY,"235 East 42nd Street New York, NY 10017",10017,http://www.pfizer.com/home/,www.pfizerrxpathways.com,yes,1.0,yes
1,Roche-genentech,,CA,1 DNA Way south San Francisco CA 94080,,,,yes,2.0,yes
2,Novartis Pharmaceuticals Corporation,Hanover,NJ,"One Health Plaza East Hanover, NJ 07936-1080",07936-1080,https://www.pharma.us.novartis.com/,https://www.pharma.us.novartis.com...,yes,3.0,yes
3,Johnson & Johnson Consumer Inc.,New Brunswick,NJ,"1 Johnson and Johnson Plaza, New Brunswick, NJ...",8933,http://www.jnj.com/,https://www.careers.jnj.com,yes,4.0,
4,Merck,kenilworth,NJ,"2000 Galloping Hill Road, Kenilworth, NJ 07033",,,,yes,5.0,yes


### Create csv file for saving latitude and longitude 

In [11]:
csv_file = open('us_pharma_coordinate.csv', 'ab')
writer = csv.writer(csv_file, dialect='excel', delimiter=',', encoding='utf-8')
#writer.writerow(['Search_Term', 'Result', 'Title', 'URL', 'Abstract'])
writer.writerow(['lat', 'lng'])

9

In [12]:
# take only full address
df_add = df['full_address']

In [13]:
len(df_add)

308

### find latitude and longitude from address

In [14]:
geocoder = GoogleGeocoder("your key")

In [15]:
# address that does not work save into list
no_lat =[]
for k in df_add:
    try:
        search = geocoder.get(k)
        lat = search[0].geometry.location.lat
        lng = search[0].geometry.location.lng
        writer.writerow([lat, lng])
        
    except:
        no_lat.append(k)
        pass
csv_file.close()  



In [16]:
# check any address that did not work
# luckily all of them worked
no_lat

[]

In [35]:
# read the latitude file
cordi = pd.read_csv('us_pharma_coordinate.csv')

In [36]:
cordi.shape

(308, 2)

In [37]:
# merge two file into one
df_map = pd.concat([df, cordi], axis=1)

In [38]:
df_map.head()

Unnamed: 0,company,city,state,full_address,zip_code,website,career,top,rank,AI,lat,lng
0,Pfizer Inc.,New York,NY,"235 East 42nd Street New York, NY 10017",10017,http://www.pfizer.com/home/,www.pfizerrxpathways.com,yes,1.0,yes,40.750302,-73.972657
1,Roche-genentech,,CA,1 DNA Way south San Francisco CA 94080,,,,yes,2.0,yes,37.656452,-122.377645
2,Novartis Pharmaceuticals Corporation,Hanover,NJ,"One Health Plaza East Hanover, NJ 07936-1080",07936-1080,https://www.pharma.us.novartis.com/,https://www.pharma.us.novartis.com...,yes,3.0,yes,40.808241,-74.388637
3,Johnson & Johnson Consumer Inc.,New Brunswick,NJ,"1 Johnson and Johnson Plaza, New Brunswick, NJ...",8933,http://www.jnj.com/,https://www.careers.jnj.com,yes,4.0,,40.499277,-74.442038
4,Merck,kenilworth,NJ,"2000 Galloping Hill Road, Kenilworth, NJ 07033",,,,yes,5.0,yes,40.67743,-74.272355


In [39]:
# save file to directory
df_map.to_csv('df_R.csv',index=False, )

In [40]:
# make another copy named data
data = df_map.copy()

In [41]:
data.columns


Index(['company', 'city', 'state', 'full_address', 'zip_code', 'website',
       'career', 'top', 'rank', 'AI', 'lat', 'lng'],
      dtype='object')

In [42]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 12 columns):
company         308 non-null object
city            266 non-null object
state           279 non-null object
full_address    308 non-null object
zip_code        264 non-null object
website         258 non-null object
career          239 non-null object
top             44 non-null object
rank            44 non-null float64
AI              30 non-null object
lat             308 non-null float64
lng             308 non-null float64
dtypes: float64(3), object(9)
memory usage: 29.0+ KB


In [43]:
# replace top nan to no
# replace rank nan to not ranked
# replace ai nan to no

In [44]:
repla_top = { np.nan : 'No', 'yes': 'Yes'}
data['top'].replace(repla_top, inplace = True)

In [45]:
repla_ai = { np.nan : 'No', 'yes': 'Yes'}
data['AI'].replace(repla_ai, inplace = True)

In [46]:
repla_rank = { np.nan : 'Not Ranked'}
data['rank'].replace(repla_rank, inplace = True)

### make the map

In [48]:
lat = list(data["lat"])
lon =list(data["lng"])
company_name =list(data["company"])
address_full = list(data["full_address"])
ai = list(data['AI'])
top = list(data['top'])
rank = list(data['rank'])

# function for making different color

def color_producer(top, ai):
    if top =='Yes' and ai == 'Yes':
        return "red"
    elif top =="Yes" and ai != 'Yes':
        return "blue"
    elif ai =='Yes' and top != 'Yes':
        return "purple"
    else:
        return "green"


map = folium.Map(location=[35.199,-101.91], zoom_start=5)

fg=folium.FeatureGroup(name="My Map")

for lt, ln,cn,tp, ai, rn in zip(lat, lon,company_name, top, ai, rank):
    fg.add_child(folium.Marker(location=[lt,ln],
       popup= folium.Popup(str(cn) + " , "+ "Top_Pharma?" + " "+ str(tp) + " "+ "||"+"Rank"+": "+ str(rn)+ " "+ " ||"+ "Use_AI?"+ " "+ str(ai)),
                               icon = folium.Icon(color = color_producer(tp, ai))))

    map.add_child(fg)

map.save("us_pharma_map.html")