# This file works for selecting 2 stations with the smallest missing value in each city.
---

# Seattle

In [2]:
import pandas as pd

def read_and_combine(*filepaths):
    data_frames = [pd.read_csv(filepath) for filepath in filepaths]
    combined_data = pd.concat(data_frames)
    return combined_data

filepaths = [
    'seattle0304.csv',
    'seattle0506.csv',
    'seattle0708.csv',
    'seattle0910.csv',
    'seattle1112.csv',
    'seattle1314.csv',
    'seattle1516.csv'
]

data = read_and_combine(*filepaths)
data

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DAPR,DAPR_ATTRIBUTES,MDPR,MDPR_ATTRIBUTES,PRCP,PRCP_ATTRIBUTES,SNOW,SNOW_ATTRIBUTES,SNWD,SNWD_ATTRIBUTES
0,USC00456295,"PALMER 3 ESE, WA US",47.308890,-121.855560,268.8,2003/1/1,,,,,0.01,",,0,0800",,,,
1,USC00456295,"PALMER 3 ESE, WA US",47.308890,-121.855560,268.8,2003/1/2,,,,,1.38,",,0,0800",,,,
2,USC00456295,"PALMER 3 ESE, WA US",47.308890,-121.855560,268.8,2003/1/3,,,,,0.49,",,0,0800",,,,
3,USC00456295,"PALMER 3 ESE, WA US",47.308890,-121.855560,268.8,2003/1/4,,,,,0.33,",,0,0800",,,,
4,USC00456295,"PALMER 3 ESE, WA US",47.308890,-121.855560,268.8,2003/1/5,,,,,0.79,",,0,0800",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72592,US1WASN0027,"EVERETT 2.4 NNW, WA US",47.996751,-122.209573,31.7,2016-12-27,,,,,0.14,",,N",,,,
72593,US1WASN0027,"EVERETT 2.4 NNW, WA US",47.996751,-122.209573,31.7,2016-12-28,,,,,0.03,",,N",,,,
72594,US1WASN0027,"EVERETT 2.4 NNW, WA US",47.996751,-122.209573,31.7,2016-12-29,,,,,0.00,",,N",0.0,",,N",,
72595,US1WASN0027,"EVERETT 2.4 NNW, WA US",47.996751,-122.209573,31.7,2016-12-30,,,,,0.40,",,N",,,,


In [3]:
def unify_date_format(data, date_column='DATE'):
    # 将'date'列转换为datetime格式，错误的日期格式会被设为NaT
    data[date_column] = pd.to_datetime(data[date_column], errors='coerce')
    # 将NaT（不是日期的值）删除或者替换
    data = data.dropna(subset=[date_column])
    # 按照日期列从远及近的顺序排列
    data = data.sort_values(by=date_column)
    # 再将'date'列转换为字符串，并指定格式为'YYYY-MM-DD'
    data[date_column] = data[date_column].apply(lambda x: x.strftime('%Y-%m-%d'))
    return data

data = unify_date_format(data)
data

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,DAPR,DAPR_ATTRIBUTES,MDPR,MDPR_ATTRIBUTES,PRCP,PRCP_ATTRIBUTES,SNOW,SNOW_ATTRIBUTES,SNWD,SNWD_ATTRIBUTES
0,USC00456295,"PALMER 3 ESE, WA US",47.308890,-121.855560,268.8,2003-01-01,,,,,0.01,",,0,0800",,,,
3339,USC00459021,"WAUNA 3 W, WA US",47.372500,-122.702700,5.2,2003-01-01,,,,,0.07,",,0,0800",,,,
6994,USW00094248,"RENTON MUNICIPAL AIRPORT, WA US",47.495130,-122.214420,5.5,2003-01-01,,,,,0.92,",,W",,,,
2608,USW00094274,"TACOMA NARROWS AIRPORT, WA US",47.267440,-122.576230,88.4,2003-01-01,,,,,1.12,",,W",,,,
10579,USC00454169,"KENT, WA US",47.417350,-122.243300,8.8,2003-01-01,,,,,0.76,",,0,1700",,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55222,US1WAKG0007,"NORTH BEND 2.8 SE, WA US",47.468918,-121.744997,157.6,2016-12-31,,,,,0.00,",,N",0.0,",,N",,
13222,US1WAKG0052,"SEATTLE 5.6 SSW, WA US",47.543167,-122.381017,113.1,2016-12-31,,,,,0.01,",,N",,,,
64165,USC00454486,"LANDSBURG, WA US",47.376600,-121.961300,163.1,2016-12-31,,,,,0.00,"T,,7,2400",,,,
23054,US1WAKG0042,"NEWPORT HILLS 1.9 SSE, WA US",47.521080,-122.161270,104.2,2016-12-31,,,,,0.02,",,N",,,,


In [4]:
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
data

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,ELEVATION,DATE,PRCP,PRCP_ATTRIBUTES
0,USC00456295,"PALMER 3 ESE, WA US",47.308890,-121.855560,268.8,2003-01-01,0.01,",,0,0800"
3339,USC00459021,"WAUNA 3 W, WA US",47.372500,-122.702700,5.2,2003-01-01,0.07,",,0,0800"
6994,USW00094248,"RENTON MUNICIPAL AIRPORT, WA US",47.495130,-122.214420,5.5,2003-01-01,0.92,",,W"
2608,USW00094274,"TACOMA NARROWS AIRPORT, WA US",47.267440,-122.576230,88.4,2003-01-01,1.12,",,W"
10579,USC00454169,"KENT, WA US",47.417350,-122.243300,8.8,2003-01-01,0.76,",,0,1700"
...,...,...,...,...,...,...,...,...
55222,US1WAKG0007,"NORTH BEND 2.8 SE, WA US",47.468918,-121.744997,157.6,2016-12-31,0.00,",,N"
13222,US1WAKG0052,"SEATTLE 5.6 SSW, WA US",47.543167,-122.381017,113.1,2016-12-31,0.01,",,N"
64165,USC00454486,"LANDSBURG, WA US",47.376600,-121.961300,163.1,2016-12-31,0.00,"T,,7,2400"
23054,US1WAKG0042,"NEWPORT HILLS 1.9 SSE, WA US",47.521080,-122.161270,104.2,2016-12-31,0.02,",,N"


In [5]:
# save the data to csv file
data.to_csv('seattle_PRCP.csv',index=False)

import pandas as pd

def find_min_missing_stations(filepath, column='PRCP', station_column='STATION'):
    data = pd.read_csv(filepath)
    stations = data[station_column].unique()

    missing_values = {}
    for station in stations:
        station_data = data[data[station_column] == station]
        missing_values[station] = station_data[column].isnull().sum()

    print(f"Total missing value: {sum(missing_values.values())}")

    # 排序并选取缺失值最小的两个站点
    sorted_stations = sorted(missing_values.items(), key=lambda x: x[1])
    for station, missing_value in sorted_stations[:2]:
        print(f"Station: {station}, Missing_value: {missing_value}")

    return [x[0] for x in sorted_stations[:2]]

file_path = 'seattle_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)


Total missing value: 7155
Station: US1WAPR0014, Missing_value: 0
Station: US1WAKP0013, Missing_value: 0


---

# Portland OR

In [6]:
filepaths = [
    'portland OR0307.csv',
    'portland OR0811.csv',
    'portland OR1216.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('portlandOR_PRCP.csv',index=False)

file_path = 'portlandOR_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 3580
Station: USW00024229, Missing_value: 0
Station: US1ORCC0001, Missing_value: 0


---

# Los Angeles, CA

In [7]:
filepaths = [
    'losangeles0307.csv',
    'losangeles0812.csv',
    'losangeles1316.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('losangeles_PRCP.csv',index=False)

file_path = 'losangeles_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 64052
Station: USW00023152, Missing_value: 0
Station: USC00049785, Missing_value: 0


---

# Boise

In [8]:
filepaths = [
    'Boise.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('Boise_PRCP.csv',index=False)

file_path = 'Boise_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 4966
Station: USW00024131, Missing_value: 0
Station: USS0016F02S, Missing_value: 0


---

# Las Vegas, NV

In [9]:
filepaths = [
    'las vegas.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('las vegas_PRCP.csv',index=False)

file_path = 'las vegas_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]


Total missing value: 21509
Station: USC00264439, Missing_value: 0
Station: USW00023169, Missing_value: 0


  data = pd.read_csv(filepath)


---

# Billings

In [10]:
filepaths = [
    'billings.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('billings_PRCP.csv',index=False)

file_path = 'billings_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]


Total missing value: 4709
Station: USW00024033, Missing_value: 0
Station: USC00240801, Missing_value: 0


---

# Cheyenne

In [11]:
filepaths = [
    'cheyenne0310.csv',
    'cheyenne1116.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('cheyenne_PRCP.csv',index=False)

file_path = 'cheyenne_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 1843
Station: USW00024018, Missing_value: 0
Station: USC00481676, Missing_value: 0


---

# SaltLakeCity

In [12]:
filepaths = [
    'saltlakecity0310.csv',
    'saltlakecity1116.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('saltlakecity_PRCP.csv',index=False)

file_path = 'saltlakecity_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 1177
Station: USW00024127, Missing_value: 0
Station: USS0011J42S, Missing_value: 0


---

# Phoenix

In [13]:
filepaths = [
    'phoenix0304.csv',
    'phoenix0506.csv',
    'phoenix0708.csv',
    'phoenix0910.csv',
    'phoenix1112.csv',
    'phoenix1314.csv',
    'phoenix1516.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('phoenix_PRCP.csv',index=False)

file_path = 'phoenix_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 1655
Station: USC00028112, Missing_value: 0
Station: USW00023183, Missing_value: 0


---

# Denver

In [14]:
filepaths = [
    'denver2003.csv',
    'denver2004.csv',
    'denver2005.csv',
    'denver2006.csv',
    'denver2007.csv',
    'denver2008.csv',
    'denver2009.csv',
    'denver2010.csv',
    'denver2011.csv',
    'denver2012.csv',
    'denver2013.csv',
    'denver2014.csv',
    'denver2015.csv',
    'denver2016.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('denver_PRCP.csv',index=False)

file_path = 'denver_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 36802
Station: US1COJF0092, Missing_value: 0
Station: US1COAR0068, Missing_value: 0


---

# Albuquerque, NM

In [15]:
filepaths = [
    'albuquerque0304.csv',
    'albuquerque0506.csv',
    'albuquerque0708.csv',
    'albuquerque0910.csv',
    'albuquerque1112.csv',
    'albuquerque1314.csv',
    'albuquerque1516.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('albuquerque_PRCP.csv',index=False)

file_path = 'albuquerque_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 3696
Station: USC00290231, Missing_value: 0
Station: USC00292753, Missing_value: 0


---

# Fargo

In [16]:
filepaths = [
    'fargo.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('fargo_PRCP.csv',index=False)

file_path = 'fargo_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]


Total missing value: 298
Station: USW00014914, Missing_value: 0
Station: USC00215586, Missing_value: 0


---

# Sioux Falls

In [17]:
filepaths = [
    'sioux falls.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('sioux falls_PRCP.csv',index=False)

file_path = 'sioux falls_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 560
Station: USW00014944, Missing_value: 0
Station: US1SDMH0020, Missing_value: 0


---

# Omaha

In [18]:
filepaths = [
    'omaha0309.csv',
    'omaha1016.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('omaha_PRCP.csv',index=False)

file_path = 'omaha_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 715
Station: USW00014942, Missing_value: 0
Station: USC00253467, Missing_value: 0


---

# Wichita

In [19]:
filepaths = [
    'wichita0310.csv',
    'wichita1116.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('wichita_PRCP.csv',index=False)

file_path = 'wichita_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 696
Station: USW00003928, Missing_value: 0
Station: US1KSSG0002, Missing_value: 0


  data = pd.read_csv(filepath)


---

# Oklahoma City

In [20]:
filepaths = [
    'oklahoma city0306.csv',
    'oklahoma city0710.csv',
    'oklahoma city1114.csv',
    'oklahoma city1516.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('oklahoma city_PRCP.csv',index=False)

file_path = 'oklahoma city_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 3070
Station: USW00013967, Missing_value: 0
Station: USC00347068, Missing_value: 0


---

# Dallas

In [21]:
filepaths = [
    'dallas.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('dallas_PRCP.csv',index=False)

file_path = 'dallas_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 182
Station: USW00013960, Missing_value: 0
Station: USC00417588, Missing_value: 0


---

# Minneapolis, MN, USA

In [22]:
filepaths = [
    'minneapolis0305.csv',
    'minneapolis1214.csv',
    'minneapolis1516.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('minneapolis_PRCP.csv',index=False)

file_path = 'minneapolis_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data = pd.read_csv(filepath)


Total missing value: 3562
Station: USC00218450, Missing_value: 0
Station: USC00211448, Missing_value: 0


---

# Des moines

In [23]:
filepaths = [
    'des moines0310.csv',
    'des moines1116.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('des moines_PRCP.csv',index=False)

file_path = 'des moines_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 679
Station: US1IAPK0001, Missing_value: 0
Station: US1IAPK0042, Missing_value: 0


---

# St.Louis

In [24]:
filepaths = [
    'st.louis0310.csv',
    'st.louis1116.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('st.louis_PRCP.csv',index=False)

file_path = 'st.louis_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 1345
Station: USW00013994, Missing_value: 0
Station: US1ILSC0009, Missing_value: 0


---

# Little Rock, AR, USA

In [25]:
filepaths = [
    'littlerock.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('littlerock_PRCP.csv',index=False)

file_path = 'littlerock_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 576
Station: USW00003952, Missing_value: 0
Station: USC00034010, Missing_value: 0


---

# New Orleans, LA, USA

In [26]:
filepaths = [
    'neworleans.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('neworleans_PRCP.csv',index=False)

file_path = 'neworleans_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 1178
Station: USW00012916, Missing_value: 0
Station: US1LAJF0001, Missing_value: 0


---


# Milwaukee

In [28]:
filepaths = [
    'milwaukee0306.csv',
    'milwaukee0710.csv',
    'milwaukee1113.csv',
    'milwaukee1416.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('milwaukee_PRCP.csv',index=False)

file_path = 'milwaukee_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data = pd.read_csv(filepath)


Total missing value: 1233
Station: USC00479050, Missing_value: 0
Station: USW00014839, Missing_value: 0


---

# Chicago

In [29]:
filepaths = [
    'chicago0304.csv',
    'chicago0506.csv',
    'chicago0708.csv',
    'chicago0910.csv',
    'chicago1112.csv',
    'chicago1314.csv',
    'chicago1516.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('chicago_PRCP.csv',index=False)

file_path = 'chicago_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 11703
Station: USC00111577, Missing_value: 0
Station: USW00094846, Missing_value: 0


---

# Jackson

In [30]:
filepaths = [
    'jackson.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('jackson_PRCP.csv',index=False)

file_path = 'jackson_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 675
Station: USW00003940, Missing_value: 0
Station: US1MSRN0013, Missing_value: 0


---

# Detroit

In [31]:
filepaths = [
    'detroit0306.csv',
    'detroit0710.csv',
    'detroit1113.csv',
    'detroit1416.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('detroit_PRCP.csv',index=False)

file_path = 'detroit_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 7679
Station: CA006139525, Missing_value: 0
Station: USW00094847, Missing_value: 0


---

# Indianapolis, IN, USA

In [32]:
filepaths = [
    'indianapolis0305.csv',
    'indianapolis0608.csv',
    'indianapolis0911.csv',
    'indianapolis1214.csv',
    'indianapolis1516.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('indianapolis_PRCP.csv',index=False)

file_path = 'indianapolis_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 4347
Station: USW00053866, Missing_value: 0
Station: USW00093819, Missing_value: 0


---

# Louisville

In [33]:
filepaths = [
    'louisville0310.csv',
    'louisville1116.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('louisville_PRCP.csv',index=False)

file_path = 'louisville_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 760
Station: USW00093821, Missing_value: 0
Station: US1INCK0014, Missing_value: 0


---

# Nashville

In [34]:
filepaths = [
    'nashville0305.csv',
    'nashville0608.csv',
    'nashville0911.csv',
    'nashville1214.csv',
    'nashville1516.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('nashville_PRCP.csv',index=False)

file_path = 'nashville_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 12055
Station: USW00013897, Missing_value: 0
Station: US1TNWL0002, Missing_value: 0


---

# Birmingham

In [35]:
filepaths = [
    'birmingham.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('birmingham_PRCP.csv',index=False)

file_path = 'birmingham_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 889
Station: USW00013876, Missing_value: 0
Station: US1ALSH0004, Missing_value: 0


---

# Cincinnati

In [36]:
filepaths = [
    'cincinnati0310.csv',
    'cincinnati116.csv'

]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('cincinnati_PRCP.csv',index=False)

file_path = 'cincinnati_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 727
Station: USW00093814, Missing_value: 0
Station: US1OHHM0003, Missing_value: 0


---

# Huntington

In [37]:
filepaths = [
    'huntington.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('huntington_PRCP.csv',index=False)

file_path = 'huntington_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]


Total missing value: 1678
Station: USW00003860, Missing_value: 0
Station: US1KYBD0001, Missing_value: 0


---

# Atlanta

In [38]:
filepaths = [
    'atlanta0310.csv',
    'atlanta1116.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('atlanta_PRCP.csv',index=False)

file_path = 'atlanta_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 1590
Station: USW00013874, Missing_value: 0
Station: US1GAGW0005, Missing_value: 0


---

# Portland ME

In [39]:
filepaths = [
    'portland ME.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('portland ME_PRCP.csv',index=False)

file_path = 'portland ME_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 792
Station: USW00014764, Missing_value: 0
Station: US1MECM0004, Missing_value: 0


---

# Burlington

In [40]:
filepaths = [
    'burlington.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('burlington_PRCP.csv',index=False)

file_path = 'burlington_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 6670
Station: USW00014742, Missing_value: 0
Station: USC00437607, Missing_value: 0


---

# Nashua

In [41]:
filepaths = [
    'nashua0310.csv',
    'nashua116.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('nashua_PRCP.csv',index=False)

file_path = 'nashua_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 823
Station: USC00272302, Missing_value: 0
Station: US1MAMD0004, Missing_value: 0


---

# Boston

In [42]:
filepaths = [
    'boston0305.csv',
    'boston0608.csv',
    'boston0911.csv',
    'boston1214.csv',
    'boston1516.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('boston_PRCP.csv',index=False)

file_path = 'boston_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 3894
Station: USC00190860, Missing_value: 0
Station: USC00190535, Missing_value: 0


---

# Providence

In [43]:
filepaths = [
    'providence0309.csv',
    'providence1016.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('providence_PRCP.csv',index=False)

file_path = 'providence_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]


Total missing value: 885
Station: USW00014765, Missing_value: 0
Station: US1RIWS0004, Missing_value: 0


---

# New York

In [44]:
filepaths = [
    'newyork0305.csv',
    'newyork0608.csv',
    'newyork0911.csv',
    'newyork1214.csv',
    'newyork1516.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('newyork_PRCP.csv',index=False)

file_path = 'newyork_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 4917
Station: USC00305796, Missing_value: 0
Station: USW00014732, Missing_value: 0


---

# Bridgeport

In [45]:
filepaths = [
    'bridgeport.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('bridgeport_PRCP.csv',index=False)

file_path = 'bridgeport_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 102
Station: USW00094702, Missing_value: 0
Station: USC00067157, Missing_value: 0


---

# Philadelphia

In [46]:
filepaths = [
    'philadelphia0304.csv',
    'philadelphia0506.csv',
    'philadelphia0708.csv',
    'philadelphia0910.csv',
    'philadelphia1112.csv',
    'philadelphia1314.csv',
    'philadelphia1516.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('philadelphia_PRCP.csv',index=False)

file_path = 'philadelphia_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 5329
Station: USW00013781, Missing_value: 0
Station: US1PABK0015, Missing_value: 0


---

# Trenton

In [47]:
filepaths = [
    'trenton0310.csv',
    'trenton1116.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('trenton_PRCP.csv',index=False)

file_path = 'trenton_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 3923
Station: USC00288880, Missing_value: 0
Station: USW00014792, Missing_value: 0


---

# Dover

In [48]:
filepaths = [
    'dover.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('dover_PRCP.csv',index=False)

file_path = 'dover_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 272
Station: USC00078508, Missing_value: 0
Station: US1DEKN0015, Missing_value: 0


---

# Baltimore

In [51]:
filepaths = [
    'baltimore0305.csv',
    'baltimore0608.csv',
    'baltimore0911.csv',
    'baltimore1214.csv',
    'baltimore1516.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('baltimore_PRCP.csv',index=False)

file_path = 'baltimore_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 4892
Station: USW00093721, Missing_value: 0
Station: USC00185934, Missing_value: 0


---

# Virginia Beach

In [50]:
filepaths = [
    'virginiabeach.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('virginiabeach_PRCP.csv',index=False)

file_path = 'virginiabeach_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 6404
Station: USC00446147, Missing_value: 0
Station: US1VAVBC002, Missing_value: 0


---

# Charlotte

In [52]:
filepaths = [
    'charlotte0306.csv',
    'charlotte0710.csv',
    'charlotte1113.csv',
    'charlotte1416.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('charlotte_PRCP.csv',index=False)

file_path = 'charlotte_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]
  data = pd.read_csv(filepath)


Total missing value: 7791
Station: USW00013881, Missing_value: 0
Station: US1NCLN0008, Missing_value: 0


---

# Charleston

In [53]:
filepaths = [
    'charleston0308.csv',
    'charleston0913.csv',
    'charleston1416.csv'
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('charleston_PRCP.csv',index=False)

file_path = 'charleston_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

Total missing value: 2022
Station: USW00013880, Missing_value: 0
Station: USW00013782, Missing_value: 0


---

# Miami

In [54]:
filepaths = [
    'miami0310.csv',
    'miami1116.csv',
]
data = read_and_combine(*filepaths)
data = unify_date_format(data)
data.drop(['DAPR','DAPR_ATTRIBUTES','MDPR','MDPR_ATTRIBUTES','SNOW','SNOW_ATTRIBUTES','SNWD','SNWD_ATTRIBUTES'],axis=1,inplace=True)
# save the data to csv file
data.to_csv('miami_PRCP.csv',index=False)

file_path = 'miami_PRCP.csv'
smallest_missing_stations = find_min_missing_stations(file_path)

  data_frames = [pd.read_csv(filepath) for filepath in filepaths]


Total missing value: 1124
Station: USW00012839, Missing_value: 0
Station: USC00085667, Missing_value: 0
