# List all zones that fall under a country

In [1]:
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.wait import WebDriverWait

from paths import berkeley_earth_dataset_url
from scrapers import prepare_firefox_driver
import pandas as pd
import numpy as np

zone_data = pd.DataFrame(columns=['Country','Division','Zone'])

countries = ["Bangladesh","India","Myanmar","Nepal","Bhutan"]

In [2]:
driver = prepare_firefox_driver()

In [6]:
country = countries[0]

In [3]:
berkeleyearth_data_url_country = berkeley_earth_dataset_url + country + "/"
driver.get(berkeleyearth_data_url_country)
links = driver.find_elements_by_tag_name("a")
divisions = [link.text for link in links if link.text.endswith("/")]
divisions

NameError: name 'country' is not defined

In [44]:
division = divisions[0]
berkeleyearth_data_url_country_area = berkeleyearth_data_url_country + division

driver.get(berkeleyearth_data_url_country_area)
links = driver.find_elements_by_tag_name("a")
candidate_zones = [link.text for link in links if link.text.endswith("txt")]
candidate_zones

['Barisal.txt', 'Bhola.txt', 'Pirojpur.txt']

In [46]:
zone = candidate_zones[0]

In [50]:
zone_data_length = len(zone_data)
zone_data.loc[zone_data_length] = [country,division[:-1],zone[:-4]]

In [51]:
zone_data

Unnamed: 0,Country,Division,Zone
0,Bangladesh,Barisal,Barisal


In [49]:
[country,division[:-1],zone[:-4]]

['Bangladesh', 'Barisal', 'Barisal']

In [4]:
for country in countries[:2]:
    berkeleyearth_data_url_country = berkeley_earth_dataset_url + country + "/"
    driver.get(berkeleyearth_data_url_country)
    links = driver.find_elements_by_tag_name("a")
    divisions = [link.text for link in links if link.text.endswith("/")]

    for division in divisions:
        berkeleyearth_data_url_country_area = berkeleyearth_data_url_country + division

        driver.get(berkeleyearth_data_url_country_area)
        links = driver.find_elements_by_tag_name("a")
        candidate_zones = [link.text for link in links if link.text.endswith("txt")]

        for zone in candidate_zones:
            zone_data_length = len(zone_data)
            zone_data.loc[zone_data_length] = [country,division[:-1],zone[:-4]]


In [5]:
zone_data

Unnamed: 0,Country,Division,Zone
0,Bangladesh,Barisal,Barisal
1,Bangladesh,Barisal,Bhola
2,Bangladesh,Barisal,Pirojpur
3,Bangladesh,Chittagong,Chittagong
4,Bangladesh,Chittagong,Comilla
...,...,...,...
944,India,West_Bengal,Shrirampur
945,India,West_Bengal,Shyamnagar
946,India,West_Bengal,Siuri
947,India,West_Bengal,Titagarh


In [53]:
zone_data.to_csv('bd_and_neighbours_full.csv',index=False)

In [32]:
zone_data = pd.read_csv('bd_and_neighbours.csv')
zone_data

Unnamed: 0,Country,Division,Zone
0,India,Assam,Bongaigaon
1,India,Assam,Dhuburi
2,India,Assam,Dibrugarh
3,India,Assam,Diphu
4,India,Assam,Goalpara
...,...,...,...
500,Myanmar,Tanintharyi,Myeik
501,Myanmar,Yangon,Kanbe
502,Myanmar,Yangon,Syriam
503,Myanmar,Yangon,Thongwa


# Removing area general data to keep only zone specific data

In [9]:
zone_data[zone_data.Division==zone_data.Zone]

Unnamed: 0,Country,Division,Zone
0,Bangladesh,Barisal,Barisal
3,Bangladesh,Chittagong,Chittagong
17,Bangladesh,Dhaka,Dhaka
30,Bangladesh,Khulna,Khulna
45,Bangladesh,Rajshahi,Rajshahi
55,Bangladesh,Sylhet,Sylhet
56,India,Andaman_and_Nicobar,Andaman_and_Nicobar
61,India,Andhra_Pradesh,Andhra_Pradesh
117,India,Arunachal_Pradesh,Arunachal_Pradesh
118,India,Assam,Assam


In [10]:
zone_data_clean = zone_data[(zone_data.Country!='India') | (zone_data.Division!=zone_data.Zone)]
zone_data_clean

Unnamed: 0,Country,Division,Zone
0,Bangladesh,Barisal,Barisal
1,Bangladesh,Barisal,Bhola
2,Bangladesh,Barisal,Pirojpur
3,Bangladesh,Chittagong,Chittagong
4,Bangladesh,Chittagong,Comilla
...,...,...,...
943,India,West_Bengal,Shiliguri
944,India,West_Bengal,Shrirampur
945,India,West_Bengal,Shyamnagar
946,India,West_Bengal,Siuri


# India has around 900 stations over 29 areas so we sample 3 stations from each states while discarding states with stations in low numbers

In [11]:
zone_data_clean.Country.value_counts()

India         858
Bangladesh     56
Name: Country, dtype: int64

In [12]:
zone_data_clean_india = zone_data_clean[zone_data_clean.Country=='India']
zone_data_clean_not_india = zone_data_clean[zone_data_clean.Country!='India']
division_value_counts_india = zone_data_clean_india.Division.value_counts()
division_value_counts_india

Uttar_Pradesh       107
Maharashtra          86
West_Bengal          75
Tamil_Nadu           72
Andhra_Pradesh       59
Karnataka            55
Gujarat              53
Madhya_Pradesh       50
Rajasthan            48
Bihar                41
Telangana            39
Punjab               31
Haryana              27
Kerala               23
Odisha               19
Jharkhand            16
Assam                13
Chhattisgarh         12
Uttarakhand           7
Kashmir               7
Nagaland              3
Goa                   3
NCT                   3
Meghalaya             2
Pondicherry           2
Mizoram               2
Manipur               1
Tripura               1
Himachal_Pradesh      1
Name: Division, dtype: int64

# Selection Criteria

## Random Selection

In [None]:
# zone_data_clean_india.groupby('Division').apply(lambda x: x.sample(frac=.1) if len(x)>10 else x)

In [None]:
infrequent_division_value_counts_india = division_value_counts_india[division_value_counts_india<3]

In [20]:
infrequent_division_value_counts_india

Pondicherry         2
Mizoram             2
Meghalaya           2
Tripura             1
Himachal_Pradesh    1
Manipur             1
Name: Division, dtype: int64

In [47]:
zone_data_clean_india_frequent = zone_data_clean_india[~zone_data_clean_india.isin(infrequent_division_value_counts_india.index)]
zone_data_clean_india_frequent_sample = zone_data_clean_india_frequent.groupby('Division').apply(lambda x: x.sample(3)).droplevel(0)
zone_data_clean_india_frequent_sample

Unnamed: 0,Country,Division,Zone
89,India,Andhra_Pradesh,Nandyal
59,India,Andhra_Pradesh,Anakapalle
114,India,Andhra_Pradesh,Vinukonda
121,India,Assam,Dibrugarh
119,India,Assam,Bongaigaon
...,...,...,...
871,India,Uttarakhand,Roorkee
869,India,Uttarakhand,Ramnagar
896,India,West_Bengal,Dhulian
908,India,West_Bengal,Islampur


In [34]:
zone_data_clean_not_india

Unnamed: 0,Country,Division,Zone
0,Bangladesh,Barisal,Barisal
1,Bangladesh,Barisal,Bhola
2,Bangladesh,Barisal,Pirojpur
3,Bangladesh,Chittagong,Chittagong
4,Bangladesh,Chittagong,Comilla
...,...,...,...
1007,Nepal,Western_Region,Butwal
1008,Nepal,Western_Region,Pokhara
1009,Nepal,Western_Region,Siddharthanagar
1010,Bhutan,Thimphu,Lungtenzampa


In [None]:
test_sample_data = pd.concat((zone_data_clean_not_india, zone_data_clean_india_frequent_sample))
test_sample_data.to_csv('bd_and_neighbours_india_random.csv', index=False)

## Selection Based on the Northern Pollution Prone States of India

In [1]:
india_states_selected = ['Uttar_Pradesh','West_Bengal','Madhya_Pradesh','Bihar',
                         'Telangana','Haryana','Odisha','Jharkhand',
                         'Chhattisgarh','NCT']

In [2]:
zone_data_clean_india_final = zone_data_clean_india[zone_data_clean_india.Division.isin(india_states_selected)]
zone_data_clean_bangladesh_and_myanmar_final = zone_data_clean_not_india[zone_data_clean_not_india.Country.isin(["Bangladesh","Myanmar"])]
zone_data_clean_india_final

NameError: name 'zone_data_clean_india' is not defined

# Finalizing Dataset

In [15]:
test_sample_data = pd.concat((zone_data_clean_india_final,zone_data_clean_bangladesh_and_myanmar_final))
test_sample_data

Unnamed: 0,Country,Division,Zone
119,India,Assam,Bongaigaon
120,India,Assam,Dhuburi
121,India,Assam,Dibrugarh
122,India,Assam,Diphu
123,India,Assam,Goalpara
...,...,...,...
51,Bangladesh,Rangpur_Division,Rangpur
52,Bangladesh,Rangpur_Division,Thakurgaon
53,Bangladesh,Sylhet,Habiganj
54,Bangladesh,Sylhet,Maulavi_Bazar


In [16]:
test_sample_data.to_csv('bd_and_neighbours.csv',index=False)

# File Validation

In [27]:
from paths import raw_data_path
from data_preparation import get_common_id
import os

In [24]:
raw_file_path = raw_data_path+get_common_id()
files = pd.Series(os.listdir(raw_file_path))

In [20]:
candidate_zones = files.str[:-4].str.replace('-', ' ')
candidate_zones

0             Maubin
1           Myingyan
2               Agra
3           Adilabad
4      Jumri_Tilaiya
           ...      
494         Lalitpur
495           Itarsi
496         Raniganj
497            Satna
498    Greater_Noida
Length: 499, dtype: object

In [34]:
zone_data[zone_data.Zone.isin(candidate_zones)]

Unnamed: 0,Country,Division,Zone
0,India,Assam,Bongaigaon
1,India,Assam,Dhuburi
2,India,Assam,Dibrugarh
3,India,Assam,Diphu
4,India,Assam,Goalpara
...,...,...,...
500,Myanmar,Tanintharyi,Myeik
501,Myanmar,Yangon,Kanbe
502,Myanmar,Yangon,Syriam
503,Myanmar,Yangon,Thongwa


In [47]:
zone_data[zone_data.Zone.duplicated(keep=False)]

Unnamed: 0,Country,Division,Zone
17,India,Bihar,Bangaon
33,India,Bihar,Jamalpur
64,India,Chhattisgarh,Raipur
74,India,Haryana,Gorakhpur
250,India,Uttar_Pradesh,Faridpur
261,India,Uttar_Pradesh,Gorakhpur
299,India,Uttar_Pradesh,Nawabganj
336,India,West_Bengal,Bangaon
414,Bangladesh,Chittagong,Raipur
421,Bangladesh,Dhaka,Faridpur


# Selecting files subset

In [47]:
import os
from paths import berkeley_earth_data

raw_path = berkeley_earth_data + 'raw/bd_and_neighbours'
all_zone = os.listdir(raw_path)
len(all_zone)

505

In [48]:
candidate_zones = list(test_sample_data.Division + '_' + test_sample_data.Zone + '.txt')
candidate_zones

['Assam_Bongaigaon.txt',
 'Assam_Dhuburi.txt',
 'Assam_Dibrugarh.txt',
 'Assam_Diphu.txt',
 'Assam_Goalpara.txt',
 'Assam_Guwahati.txt',
 'Assam_Jorhat.txt',
 'Assam_Karimganj.txt',
 'Assam_North_Lakhimpur.txt',
 'Assam_Sibsagar.txt',
 'Assam_Silchar.txt',
 'Assam_Tezpur.txt',
 'Assam_Tinsukia.txt',
 'Bihar_Araria.txt',
 'Bihar_Arrah.txt',
 'Bihar_Aurangabad.txt',
 'Bihar_Bagaha.txt',
 'Bihar_Bangaon.txt',
 'Bihar_Baruni.txt',
 'Bihar_Begusarai.txt',
 'Bihar_Bettiah.txt',
 'Bihar_Bhagalpur.txt',
 'Bihar_Bihar_Sharif.txt',
 'Bihar_Buxar.txt',
 'Bihar_Chapra.txt',
 'Bihar_Darbhanga.txt',
 'Bihar_Dehri.txt',
 'Bihar_Dinapore.txt',
 'Bihar_Dumraon.txt',
 'Bihar_Gaya.txt',
 'Bihar_Gopalganj.txt',
 'Bihar_Hajipur.txt',
 'Bihar_Jahanabad.txt',
 'Bihar_Jamalpur.txt',
 'Bihar_Jamui.txt',
 'Bihar_Katihar.txt',
 'Bihar_Khagaul.txt',
 'Bihar_Kishanganj.txt',
 'Bihar_Luckeesarai.txt',
 'Bihar_Madhipura.txt',
 'Bihar_Madhubani.txt',
 'Bihar_Masaurhi_Buzurg.txt',
 'Bihar_Mokameh.txt',
 'Bihar_Monghyr

In [49]:
del_zones = [del_zone for del_zone in all_zone if not del_zone in candidate_zones]
del_zones

['Ayeyarwady_Kyaiklat.txt',
 'Yangon_Thongwa.txt',
 'Magway_Taungdwingyi.txt',
 'Tanintharyi_Myeik.txt',
 'Magway_Minbu.txt',
 'Mandalay_Meiktila.txt',
 'Shan_Lashio.txt',
 'Magway_Thayetmyo.txt',
 'Magway_Yenangyaung.txt',
 'Kayin_Hpa-an.txt',
 'Bago_Tharyarwady.txt',
 'Sagain_Monywa.txt',
 'Bago_Bago.txt',
 'Sagain_Sagaing.txt',
 'Shan_Taunggyi.txt',
 'Kachin_Myitkyina.txt',
 'Mandalay_Mogok.txt',
 'Shan_Tachilek.txt',
 'Ayeyarwady_Pathein.txt',
 'Mon_Mawlamyine.txt',
 'Magway_Chauk.txt',
 'Rakhine_Sittwe.txt',
 'Mandalay_Myingyan.txt',
 'Ayeyarwady_Maubin.txt',
 'Mandalay_Yamethin.txt',
 'Yangon_Kanbe.txt',
 'Yangon_Syriam.txt',
 'Tanintharyi_Dawei.txt',
 'Ayeyarwady_Bogale.txt',
 'Magway_Myaydo.txt',
 'Sagain_Shwebo.txt',
 'Mon_Thaton.txt',
 'Yangon_Yangon.txt',
 'Magway_Magway.txt',
 'Nay_Pyi_Taw_Nay_Pyi_Taw.txt',
 'Mandalay_Kyaukse.txt',
 'Ayeyarwady_Pyapon.txt',
 'Bago_Nyaunglebin.txt',
 'Nay_Pyi_Taw_Pyinmana.txt',
 'Mandalay_Mandalay.txt',
 'Mandalay_Pyin_Oo_Lwin.txt',
 'Magway

In [58]:
for del_zone in del_zones:
    del_zone_path = raw_path+'/'+del_zone
    print(del_zone_path)
    # os.remove(del_zone_path)

/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Ayeyarwady_Kyaiklat.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Yangon_Thongwa.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Magway_Taungdwingyi.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Tanintharyi_Myeik.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Magway_Minbu.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Mandalay_Meiktila.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Shan_Lashio.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Magway_Thayetmyo.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Magway_Yenangyaung.txt
/home/asif/Data/Dataset/AQ Dataset/Berkeley Earth Data/raw/bd_and_neighbours/Kayin_Hpa-an.txt
/home/asif/Data/Dataset/A

# Collecting URLs for Western States of USA

In [9]:
us_states = ["Washington","Oregon","California","Idaho","Nevada","Montana","Wyoming","Utah","Arizona","Colorado","New_Mexico","Texas"]
country = 'United_States'

In [10]:
berkeleyearth_data_url_country = berkeley_earth_dataset_url + country + "/"
driver.get(berkeleyearth_data_url_country)

In [11]:
division = us_states[0]
berkeleyearth_data_url_country_area = berkeleyearth_data_url_country + division

driver.get(berkeleyearth_data_url_country_area)
links = driver.find_elements_by_tag_name("a")
candidate_zones = [link.text for link in links if link.text.endswith("txt")]
candidate_zones

['Auburn.txt',
 'Bellevue.txt',
 'Bellingham.txt',
 'Burien.txt',
 'Everett.txt',
 'Federal_Way.txt',
 'Kennewick.txt',
 'Kent.txt',
 'Kirkland.txt',
 'Lakewood.txt',
 'Marysville.txt',
 'Olympia.txt',
 'Pasco.txt',
 'Redmond.txt',
 'Renton.txt',
 'Richland.txt',
 'Sammamish.txt',
 'Seattle.txt',
 'Shoreline.txt',
 'South_Hill.txt',
 'Spokane.txt',
 'Spokane_Valley.txt',
 'Tacoma.txt',
 'Vancouver.txt',
 'Washington.txt',
 'Yakima.txt']

In [12]:
for division in us_states[:]:
    berkeleyearth_data_url_country_area = berkeleyearth_data_url_country + division

    driver.get(berkeleyearth_data_url_country_area)
    links = driver.find_elements_by_tag_name("a")
    candidate_zones = [link.text for link in links if link.text.endswith("txt")]

    for zone in candidate_zones:
        zone_data_length = len(zone_data)
        print([country,division[:],zone[:-4]])
        zone_data.loc[zone_data_length] = [country,division[:],zone[:-4]]

['United_States', 'Washington', 'Auburn']
['United_States', 'Washington', 'Bellevue']
['United_States', 'Washington', 'Bellingham']
['United_States', 'Washington', 'Burien']
['United_States', 'Washington', 'Everett']
['United_States', 'Washington', 'Federal_Way']
['United_States', 'Washington', 'Kennewick']
['United_States', 'Washington', 'Kent']
['United_States', 'Washington', 'Kirkland']
['United_States', 'Washington', 'Lakewood']
['United_States', 'Washington', 'Marysville']
['United_States', 'Washington', 'Olympia']
['United_States', 'Washington', 'Pasco']
['United_States', 'Washington', 'Redmond']
['United_States', 'Washington', 'Renton']
['United_States', 'Washington', 'Richland']
['United_States', 'Washington', 'Sammamish']
['United_States', 'Washington', 'Seattle']
['United_States', 'Washington', 'Shoreline']
['United_States', 'Washington', 'South_Hill']
['United_States', 'Washington', 'Spokane']
['United_States', 'Washington', 'Spokane_Valley']
['United_States', 'Washington', 

In [13]:
zone_data

Unnamed: 0,Country,Division,Zone
0,United_States,Washington,Auburn
1,United_States,Washington,Bellevue
2,United_States,Washington,Bellingham
3,United_States,Washington,Burien
4,United_States,Washington,Everett
...,...,...,...
389,United_States,Texas,The_Woodlands
390,United_States,Texas,Tyler
391,United_States,Texas,Victoria
392,United_States,Texas,Waco


In [14]:
zone_data.Division.value_counts()

California    198
Texas          69
Washington     26
Arizona        22
Colorado       20
Utah           15
Oregon         13
Nevada         11
Idaho           7
New_Mexico      6
Montana         4
Wyoming         3
Name: Division, dtype: int64

In [37]:
zone_data[zone_data.Division=="California"]

Unnamed: 0,Country,Division,Zone
39,United_States,California,Alameda
40,United_States,California,Alhambra
41,United_States,California,Aliso_Viejo
42,United_States,California,Anaheim
43,United_States,California,Antioch
...,...,...,...
232,United_States,California,Woodland
233,United_States,California,Woodland_Hills
234,United_States,California,Yorba_Linda
235,United_States,California,Yuba_City


In [15]:
wunderground_url_map = [
"184","https://www.wunderground.com/history/daily/us/ca/san-francisco/KSFO",
"330","https://www.wunderground.com/history/daily/us/tx/austin/KAUS",
"17","https://www.wunderground.com/history/daily/us/wa/seattle/KSEA",
"291","https://www.wunderground.com/history/daily/us/az/phoenix/KPHX",
"308","https://www.wunderground.com/history/daily/us/co/denver/KDEN",
"270","https://www.wunderground.com/history/daily/us/ut/salt-lake-city/KSLC",


"26","https://www.wunderground.com/history/daily/us/or/albany/KSLE",
"27","https://www.wunderground.com/history/daily/us/or/beaverton/KPDX",
"28","https://www.wunderground.com/history/daily/us/or/bend/KRDM",
"29","https://www.wunderground.com/history/daily/us/or/corvallis/KSLE",
"30","https://www.wunderground.com/history/daily/us/or/eugene/KEUG",
"31","https://www.wunderground.com/history/daily/us/or/gresham/KPDX",
"32","https://www.wunderground.com/history/daily/us/or/hillsboro/KPDX",
"33","https://www.wunderground.com/history/daily/us/or/medford/KMFR",
"35","https://www.wunderground.com/history/daily/us/or/portland/KPDX",
"36","https://www.wunderground.com/history/daily/us/or/salem/KSLE",
"37","https://www.wunderground.com/history/daily/us/or/springfield/KEUG",
"38","https://www.wunderground.com/history/daily/us/or/tigard/KPDX",


"244","https://www.wunderground.com/history/daily/us/nv/carson-city/KRNO",
"246","https://www.wunderground.com/history/daily/us/nv/henderson/KBVU",
"247","https://www.wunderground.com/history/daily/us/nv/las-vegas/KVGT",
"249","https://www.wunderground.com/history/daily/us/nv/north-las-vegas/KVGT",
"251","https://www.wunderground.com/history/daily/us/nv/reno/KRNO",
"252","https://www.wunderground.com/history/daily/us/nv/sparks/KRNO",


"237","https://www.wunderground.com/history/daily/us/id/boise/KBOI",
"238","https://www.wunderground.com/history/daily/us/id/caldwell/KBOI",
"240","https://www.wunderground.com/history/daily/us/id/idaho-falls/KIDA",
"241","https://www.wunderground.com/history/daily/us/id/meridian/KBOI",
"242","https://www.wunderground.com/history/daily/us/id/nampa/KBOI",
"243","https://www.wunderground.com/history/daily/us/id/pocatello/KPIH",


"319","https://www.wunderground.com/history/daily/us/nm/albuquerque/KABQ",
"321","https://www.wunderground.com/history/daily/us/nm/las-cruces/KLRU",
"323","https://www.wunderground.com/history/daily/us/nm/rio-rancho/KABQ",
"324","https://www.wunderground.com/history/daily/us/nm/santa-fe/KSAF",

"255","https://www.wunderground.com/history/daily/us/mt/billings/KBIL",
"256","https://www.wunderground.com/history/daily/us/mt/great-falls/KGTF",
"257","https://www.wunderground.com/history/daily/us/mt/missoula/KMSO",


"259","https://www.wunderground.com/history/daily/us/wy/casper/KCPR",
"260","https://www.wunderground.com/history/daily/us/wy/cheyenne/KCYS"
]

In [16]:
wunderground_url_map_df = pd.DataFrame(np.array(wunderground_url_map).reshape(-1,2)).set_index(0)
wunderground_url_map_df.index =  wunderground_url_map_df.index.astype('int')

In [17]:
wunderground_url_map_df.join(zone_data)

Unnamed: 0_level_0,1,Country,Division,Zone
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
184,https://www.wunderground.com/history/daily/us/...,United_States,California,San_Francisco
330,https://www.wunderground.com/history/daily/us/...,United_States,Texas,Austin
17,https://www.wunderground.com/history/daily/us/...,United_States,Washington,Seattle
291,https://www.wunderground.com/history/daily/us/...,United_States,Arizona,Phoenix
308,https://www.wunderground.com/history/daily/us/...,United_States,Colorado,Denver
270,https://www.wunderground.com/history/daily/us/...,United_States,Utah,Salt_Lake_City
26,https://www.wunderground.com/history/daily/us/...,United_States,Oregon,Albany
27,https://www.wunderground.com/history/daily/us/...,United_States,Oregon,Beaverton
28,https://www.wunderground.com/history/daily/us/...,United_States,Oregon,Bend
29,https://www.wunderground.com/history/daily/us/...,United_States,Oregon,Corvallis


In [19]:
selected_id = [184, 330, 17, 291, 308, 270,35,247,237,319,255,260]

In [36]:
url_strings = wunderground_url_map_df.loc[selected_id][1].str.split('/').str[-3:].str.join("/")

In [42]:
list(url_strings.values)

['ca/san-francisco/KSFO',
 'tx/austin/KAUS',
 'wa/seattle/KSEA',
 'az/phoenix/KPHX',
 'co/denver/KDEN',
 'ut/salt-lake-city/KSLC',
 'or/portland/KPDX',
 'nv/las-vegas/KVGT',
 'id/boise/KBOI',
 'nm/albuquerque/KABQ',
 'mt/billings/KBIL',
 'wy/cheyenne/KCYS']

In [1]:
# zone_data.loc[selected_id].to_csv('usa_west.csv',index=False)

NameError: name 'zone_data' is not defined

In [46]:
list(zone_data.loc[selected_id].Zone.values)

['San_Francisco',
 'Austin',
 'Seattle',
 'Phoenix',
 'Denver',
 'Salt_Lake_City',
 'Portland',
 'Las_Vegas',
 'Boise',
 'Albuquerque',
 'Billings',
 'Cheyenne']