# Importing Walmart USA Stores

## 1. Load Source Data

In [37]:
#Libraries and Settings
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows',10)

In [38]:
#Data File to Dataframe
file='/Users/c32/Documents/NYCDSA/Projects/DATA/Walmart Stores/walmart_2018_11_06.csv'
df=pd.read_csv(file, converters={'zip_code': str})
df.shape

(4654, 23)

In [39]:
df.head(1)

Unnamed: 0,index,name,url,street_address,city,state,zip_code,country,phone_number_1,phone_number_2,fax_1,fax_2,email_1,email_2,website,open_hours,latitude,longitude,facebook,twitter,instagram,pinterest,youtube
0,0,Conway Supercenter,https://www.walmart.com/store/5/conway-ar/details,1155 Hwy 65 North,Conway,AR,72032,US,501-329-0023,,,,,,,"monday - friday : 00:00-24:00, saturday : 00:0...",35.10866,-92.436905,,,,,


## 2. Verify Columns

### 2.1. Zip

In [40]:
#Zip
#Check Lenght of Zip codes is correct
df['zip']=df['zip_code']
print('zip codes with less than 5 digits: ',df[df['zip'].str.len()!=5]['zip'].nunique())
#Because the zip code was stored as an int, it lost the leading 0s when they had them.

def check_and_fix_the_zeroes(z):
    if len(z)==5:
        return z
    if len(z)==4:
        return '0'+z
    if len(z)==3:
        return '00'+z
    
df['zip']=df['zip'].apply(check_and_fix_the_zeroes)
print('zip codes with less than 5 digits: ',df[df['zip'].str.len()!=5]['zip'].nunique())


zip codes with less than 5 digits:  221
zip codes with less than 5 digits:  0


### 2.2. Type of Store

In [41]:
# The type of store is in the name let's separate it
def extract_type_of_store(store):
    if store.find('Supercenter') != -1:
        return('Supercenter')
    if store.find('Neighborhood Market') != -1:
        return('Neighborhood Market')
    if store.find('Pickup only') != -1:
        return('Pickup only')
    if store.find('Pharmacy') != -1:
        return('Pharmacy')
    if store.find('Gas Station') != -1:
        return('Gas Station')
    if store.find('Store') != -1:
        return('Store')
    else:
        return('IDK')

df['store_type']=df['name'].apply(extract_type_of_store)

## 3.1. Choose Columns to Keep

In [42]:
walmart=df[['name','store_type','zip','latitude','longitude','street_address','open_hours','phone_number_1','url']]

## 4. Save the changes

In [43]:
# There is a Store in Frisco TX that has the zip code 75033 and this zipcode is relatively new and there is no previous data related to it.
# Also, the pyzip package is not up to the date of creation of this zip code and therefore does not have any information about adjacent zip codes.
# For viability reasons I am going to use the old zip code which serves better at getting the most informative results.
walmart[walmart['zip']=='75033']='75034'

#ZIP Code 32163 is located in The Villages Florida. Created in 2013. 
#Portions of 32163 are also in Wildwood and Fruitland Park. 32163 is primarily within Sumter County, with some portions in Lake County. Regionally, it is located in Metro Orlando.
#Reasigning zip code to the nearest zip Fruitland Park.
walmart[walmart['zip']=='32163']='34731'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  walmart[walmart['zip']=='75033']='75034'
  walmart[walmart['zip']=='75033']='75034'
  walmart[walmart['zip']=='75033']='75034'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  walmart[walmart['zip']=='32163']='34731'


In [44]:
#After all the changes, let's save in a csv file.

import os
outname = '1_Load_Walmart.csv'
outdir = '/Users/c32/Documents/NYCDSA/Projects/DATA/Ready_Data'
if not os.path.exists(outdir):
    os.mkdir(outdir)
fullname = os.path.join(outdir, outname)    

walmart.to_csv(fullname, header=True, index=False)
print("Saved!")

Saved!


In [45]:
walmart.head()

Unnamed: 0,name,store_type,zip,latitude,longitude,street_address,open_hours,phone_number_1,url
0,Conway Supercenter,Supercenter,72032,35.10866,-92.436905,1155 Hwy 65 North,"monday - friday : 00:00-24:00, saturday : 00:0...",501-329-0023,https://www.walmart.com/store/5/conway-ar/details
1,Sikeston Supercenter,Supercenter,63801,36.857394,-89.586051,1303 S Main St,"monday - friday : 00:00-24:00, saturday : 00:0...",573-472-3020,https://www.walmart.com/store/9/sikeston-mo/de...
2,Tahlequah Supercenter,Supercenter,74464,35.888765,-94.979859,2020 S Muskogee Ave,"monday - friday : 00:00-24:00, saturday : 00:0...",918-456-8804,https://www.walmart.com/store/10/tahlequah-ok/...
3,Mountain Home Supercenter,Supercenter,72653,36.354957,-92.341026,65 Wal Mart Dr,"monday - friday : 00:00-24:00, saturday : 00:0...",870-492-9299,https://www.walmart.com/store/11/mountain-home...
4,Claremore Supercenter,Supercenter,74017,36.293955,-95.627125,1500 S Lynn Riggs Blvd,"monday - friday : 00:00-24:00, saturday : 00:0...",918-341-2765,https://www.walmart.com/store/12/claremore-ok/...
