# Importing Costco USA Stores

## 1. Load Source Data

In [29]:
#Libraries and Settings
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows',10)

In [30]:
#Data File to Dataframe
file='/Users/c32/Documents/NYCDSA/Projects/DATA/Costco Stores/costco-feb-2024.xlsx'
df=pd.read_excel(file)
df.shape
df.dtypes

Store Name    object
Address       object
City          object
State         object
Zipcode       object
Phone         object
URL           object
dtype: object

In [31]:
df.head()

Unnamed: 0,Store Name,Address,City,State,Zipcode,Phone,URL
0,Anchorage,330 W DIMOND BLVD,ANCHORAGE,AK,99515-1903,(907) 349-2335,https://www.costco.com/warehouse-locations/anc...
1,Anchorage Business Center,1074 N MULDOON RD,ANCHORAGE,AK,99504-2073,(907) 416-9780,https://www.costco.com/warehouse-locations/anc...
2,Fairbanks,48 COLLEGE RD,FAIRBANKS,AK,99701-1706,(907) 205-3607,https://www.costco.com/warehouse-locations/fai...
3,Juneau,5225 COMMERCIAL BLVD,JUNEAU,AK,99801-7210,(907) 780-6740,https://www.costco.com/warehouse-locations/jun...
4,NE Anchorage,4125 DEBARR RD,ANCHORAGE,AK,99508-3115,(907) 269-9510,https://www.costco.com/warehouse-locations/ne-...


## 2. Separate the Zip Code and keep the 5 digit part.

In [32]:
z='99515-1903'
z.find('-')
s=z.split('-')
s[0]

'99515'

In [33]:
def check_and_fix_the_zeroes(z):
    if len(z)==5:
        return z
    if len(z)==4:
        return '0'+z
    if len(z)==3:
        return '00'+z
    
def take_5_digits(z):
    if z.find('-')!= -1:
        parts=z.split('-')
        return check_and_fix_the_zeroes(parts[0])
    else:
        return check_and_fix_the_zeroes(z)
          
df['zip']=df['Zipcode'].astype(str).apply(take_5_digits)

df.drop(columns=['Zipcode'], inplace=True)



## 4. Save the changes

## 4.1 Remove some columns to avoid future redundant information and conflicts

In [34]:
df.drop(columns=['State'], inplace=True)
df.drop(columns=['City'], inplace=True)

In [35]:
# There is a Store in Frisco TX that has the zip code 75033 and this zipcode is relatively new and there is no previous data related to it.
# Also, the pyzip package is not up to the date of creation of this zip code and therefore does not have any information about adjacent zip codes.
# For viability reasons I am going to use the old zip code which serves better at getting the most informative results.
# Google give this store the old zip code as well. Even more of a reason to have it corrected.
df[df['zip']=='75033']='75034'

In [36]:
#After all the changes, let's save in a csv file.

import os
outname = '1_Load_Costco.csv'
outdir = '/Users/c32/Documents/NYCDSA/Projects/DATA/Ready_Data'
if not os.path.exists(outdir):
    os.mkdir(outdir)
fullname = os.path.join(outdir, outname)    

df.to_csv(fullname, header=True, index=False)
print("Saved!")

Saved!


In [37]:
df.head()

Unnamed: 0,Store Name,Address,Phone,URL,zip
0,Anchorage,330 W DIMOND BLVD,(907) 349-2335,https://www.costco.com/warehouse-locations/anc...,99515
1,Anchorage Business Center,1074 N MULDOON RD,(907) 416-9780,https://www.costco.com/warehouse-locations/anc...,99504
2,Fairbanks,48 COLLEGE RD,(907) 205-3607,https://www.costco.com/warehouse-locations/fai...,99701
3,Juneau,5225 COMMERCIAL BLVD,(907) 780-6740,https://www.costco.com/warehouse-locations/jun...,99801
4,NE Anchorage,4125 DEBARR RD,(907) 269-9510,https://www.costco.com/warehouse-locations/ne-...,99508
