# CalTrans-PeMS
### 1. Get Data

# Setup Notebook

!pip3 install mechanize==0.4.5
!pip install beautifulsoup4==4.9.0
!pip install cookiejar==0.0.3

In [1]:
# Import 3rd party libraries
import os
import sys
import pandas as pd
import glob

# Local imports
sys.path.insert(0, os.path.dirname(os.path.abspath(os.getcwd())))
from pems.handler import PeMSHandler

# Configure Notebook
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
%load_ext autoreload
%autoreload 2

# Get PeMS Data

In [2]:
# Connect to PeMS
pems = PeMSHandler(username='xyz@psu.edu', password='ABCD1234')

2023-01-04 14:48:31,032 [INFO    ] Requesting initial page...
2023-01-04 14:48:32,261 [INFO    ] Initial page opened.
2023-01-04 14:48:32,298 [INFO    ] Logging in...
2023-01-04 14:48:48,761 [INFO    ] Logged in.


In [3]:
# View labels reference
pems.label_reference

{'fastrak_5min': 'FasTrak 5-Minute',
 'gn_link_5min': 'Link 5-Minute',
 'tmg_volume_day': 'Census Volume Day',
 'reid_hour': 'Re-ID Hour',
 'meta': 'Station Metadata',
 'station_aadt': 'Station AADT',
 'fastrak_locations': 'FasTrak Locations',
 'tmg_trucks_day': 'Census Trucks Day',
 'station_day': 'Station Day',
 'chp_incidents_day': 'CHP Incidents Day',
 'reid_raw': 'Re-ID Raw',
 'station_hour': 'Station Hour',
 'station_raw': 'Station Raw',
 'fastrak_day': 'FasTrak Day',
 'tmg_vclass_hour': 'Census V-Class Hour',
 'station_5min': 'Station 5-Minute',
 'fastrak_hour': 'FasTrak Hour',
 'reid_5min': 'Re-ID 5-Minute',
 'tmg_trucks_hour': 'Census Trucks Hour',
 'tmg_vclass_day': 'Census V-Class Day',
 'reid_locations': 'Re-ID Locations',
 'tmg_station_configs': 'Census Station Configurations',
 'chp_incidents_month': 'CHP Incidents Month'}

In [4]:
# View for available file types
pems.get_file_types()

['meta',
 'station_5min',
 'station_aadt',
 'station_day',
 'station_hour',
 'station_raw',
 'chp_incidents_day',
 'chp_incidents_month',
 'fastrak_5min',
 'fastrak_day',
 'fastrak_hour',
 'fastrak_locations',
 'gn_link_5min',
 'reid_5min',
 'reid_hour',
 'reid_locations',
 'reid_raw',
 'tmg_station_configs',
 'tmg_trucks_day',
 'tmg_trucks_hour',
 'tmg_vclass_day',
 'tmg_vclass_hour',
 'tmg_volume_day']

In [5]:
# View for available districts for a file type
pems.get_districts(file_type='station_5min')

['3', '4', '5', '6', '7', '8', '10', '11', '12']

In [10]:
# View summary of available files for (start_year, end_year, districts, file_types) query
files = pems.get_files(start_year=2021, end_year=2021, districts=['4'], 
                       file_types=['station_5min'], months=None)

# View DataFrame
pd.DataFrame(files)

Unnamed: 0,file_type,district,year,month,file_name,file_id,megabites,download_url
0,station_5min,4,2021,January,d04_text_station_5min_2021_01_01.txt.gz,409489,27.5,/?download=409489&dnode=Clearinghouse
1,station_5min,4,2021,January,d04_text_station_5min_2021_01_02.txt.gz,409551,27.5,/?download=409551&dnode=Clearinghouse
2,station_5min,4,2021,January,d04_text_station_5min_2021_01_03.txt.gz,409597,27.4,/?download=409597&dnode=Clearinghouse
3,station_5min,4,2021,January,d04_text_station_5min_2021_01_04.txt.gz,409643,28.4,/?download=409643&dnode=Clearinghouse
4,station_5min,4,2021,January,d04_text_station_5min_2021_01_05.txt.gz,409689,28.6,/?download=409689&dnode=Clearinghouse
...,...,...,...,...,...,...,...,...
360,station_5min,4,2021,December,d04_text_station_5min_2021_12_27.txt.gz,430672,28.7,/?download=430672&dnode=Clearinghouse
361,station_5min,4,2021,December,d04_text_station_5min_2021_12_28.txt.gz,430714,28.8,/?download=430714&dnode=Clearinghouse
362,station_5min,4,2021,December,d04_text_station_5min_2021_12_29.txt.gz,430758,28.9,/?download=430758&dnode=Clearinghouse
363,station_5min,4,2021,December,d04_text_station_5min_2021_12_30.txt.gz,430800,28.4,/?download=430800&dnode=Clearinghouse


In [11]:
# Download files for (start_year, end_year, districts, file_types) query
pems.download_files(start_year=2021, end_year=2021, districts=['4'], 
                    file_types=['station_5min'], months = None)

2023-01-04 15:02:00,418 [INFO    ] Start download, d04_text_station_5min_2021_01_01.txt.gz
2023-01-04 15:02:07,440 [INFO    ] Download completed
2023-01-04 15:02:12,449 [INFO    ] Start download, d04_text_station_5min_2021_01_02.txt.gz
2023-01-04 15:02:18,414 [INFO    ] Download completed
2023-01-04 15:02:23,423 [INFO    ] Start download, d04_text_station_5min_2021_01_03.txt.gz
2023-01-04 15:02:29,549 [INFO    ] Download completed
2023-01-04 15:02:34,557 [INFO    ] Start download, d04_text_station_5min_2021_01_04.txt.gz
2023-01-04 15:02:41,305 [INFO    ] Download completed
2023-01-04 15:02:46,313 [INFO    ] Start download, d04_text_station_5min_2021_01_05.txt.gz
2023-01-04 15:02:53,124 [INFO    ] Download completed
2023-01-04 15:02:58,133 [INFO    ] Start download, d04_text_station_5min_2021_01_06.txt.gz
2023-01-04 15:03:04,734 [INFO    ] Download completed
2023-01-04 15:03:09,742 [INFO    ] Start download, d04_text_station_5min_2021_01_07.txt.gz
2023-01-04 15:03:16,010 [INFO    ] Down

2023-01-04 15:12:34,601 [INFO    ] Download completed
2023-01-04 15:12:39,609 [INFO    ] Start download, d04_text_station_5min_2021_02_27.txt.gz
2023-01-04 15:12:43,911 [INFO    ] Download completed
2023-01-04 15:12:48,920 [INFO    ] Start download, d04_text_station_5min_2021_02_28.txt.gz
2023-01-04 15:12:55,334 [INFO    ] Download completed
2023-01-04 15:13:00,342 [INFO    ] Start download, d04_text_station_5min_2021_03_01.txt.gz
2023-01-04 15:13:04,606 [INFO    ] Download completed
2023-01-04 15:13:09,614 [INFO    ] Start download, d04_text_station_5min_2021_03_02.txt.gz
2023-01-04 15:13:16,462 [INFO    ] Download completed
2023-01-04 15:13:21,470 [INFO    ] Start download, d04_text_station_5min_2021_03_03.txt.gz
2023-01-04 15:13:25,742 [INFO    ] Download completed
2023-01-04 15:13:30,750 [INFO    ] Start download, d04_text_station_5min_2021_03_04.txt.gz
2023-01-04 15:13:36,430 [INFO    ] Download completed
2023-01-04 15:13:41,439 [INFO    ] Start download, d04_text_station_5min_202

2023-01-04 15:22:52,450 [INFO    ] Download completed
2023-01-04 15:22:57,458 [INFO    ] Start download, d04_text_station_5min_2021_04_25.txt.gz
2023-01-04 15:23:04,421 [INFO    ] Download completed
2023-01-04 15:23:09,429 [INFO    ] Start download, d04_text_station_5min_2021_04_26.txt.gz
2023-01-04 15:23:15,249 [INFO    ] Download completed
2023-01-04 15:23:20,257 [INFO    ] Start download, d04_text_station_5min_2021_04_27.txt.gz
2023-01-04 15:23:26,699 [INFO    ] Download completed
2023-01-04 15:23:31,707 [INFO    ] Start download, d04_text_station_5min_2021_04_28.txt.gz
2023-01-04 15:23:38,123 [INFO    ] Download completed
2023-01-04 15:23:43,131 [INFO    ] Start download, d04_text_station_5min_2021_04_29.txt.gz
2023-01-04 15:23:47,776 [INFO    ] Download completed
2023-01-04 15:23:52,779 [INFO    ] Start download, d04_text_station_5min_2021_04_30.txt.gz
2023-01-04 15:24:00,080 [INFO    ] Download completed
2023-01-04 15:24:05,088 [INFO    ] Start download, d04_text_station_5min_202

2023-01-04 15:33:12,820 [INFO    ] Download completed
2023-01-04 15:33:17,828 [INFO    ] Start download, d04_text_station_5min_2021_06_21.txt.gz
2023-01-04 15:33:26,734 [INFO    ] Download completed
2023-01-04 15:33:31,742 [INFO    ] Start download, d04_text_station_5min_2021_06_22.txt.gz
2023-01-04 15:33:36,207 [INFO    ] Download completed
2023-01-04 15:33:41,215 [INFO    ] Start download, d04_text_station_5min_2021_06_23.txt.gz
2023-01-04 15:33:45,707 [INFO    ] Download completed
2023-01-04 15:33:50,715 [INFO    ] Start download, d04_text_station_5min_2021_06_24.txt.gz
2023-01-04 15:33:55,327 [INFO    ] Download completed
2023-01-04 15:34:00,335 [INFO    ] Start download, d04_text_station_5min_2021_06_25.txt.gz
2023-01-04 15:34:06,500 [INFO    ] Download completed
2023-01-04 15:34:11,508 [INFO    ] Start download, d04_text_station_5min_2021_06_26.txt.gz
2023-01-04 15:34:18,181 [INFO    ] Download completed
2023-01-04 15:34:23,190 [INFO    ] Start download, d04_text_station_5min_202

2023-01-04 15:44:04,625 [INFO    ] Download completed
2023-01-04 15:44:09,633 [INFO    ] Start download, d04_text_station_5min_2021_08_17.txt.gz
2023-01-04 15:44:15,289 [INFO    ] Download completed
2023-01-04 15:44:20,297 [INFO    ] Start download, d04_text_station_5min_2021_08_18.txt.gz
2023-01-04 15:44:41,148 [INFO    ] Download completed
2023-01-04 15:44:46,155 [INFO    ] Start download, d04_text_station_5min_2021_08_19.txt.gz
2023-01-04 15:44:52,310 [INFO    ] Download completed
2023-01-04 15:44:57,318 [INFO    ] Start download, d04_text_station_5min_2021_08_20.txt.gz
2023-01-04 15:45:01,527 [INFO    ] Download completed
2023-01-04 15:45:06,535 [INFO    ] Start download, d04_text_station_5min_2021_08_21.txt.gz
2023-01-04 15:45:13,564 [INFO    ] Download completed
2023-01-04 15:45:18,567 [INFO    ] Start download, d04_text_station_5min_2021_08_22.txt.gz
2023-01-04 15:45:25,153 [INFO    ] Download completed
2023-01-04 15:45:30,161 [INFO    ] Start download, d04_text_station_5min_202

2023-01-04 15:58:47,903 [INFO    ] Download completed
2023-01-04 15:58:52,912 [INFO    ] Start download, d04_text_station_5min_2021_10_13.txt.gz
2023-01-04 15:59:01,270 [INFO    ] Download completed
2023-01-04 15:59:06,278 [INFO    ] Start download, d04_text_station_5min_2021_10_14.txt.gz
2023-01-04 15:59:14,703 [INFO    ] Download completed
2023-01-04 15:59:19,711 [INFO    ] Start download, d04_text_station_5min_2021_10_15.txt.gz
2023-01-04 15:59:27,436 [INFO    ] Download completed
2023-01-04 15:59:32,443 [INFO    ] Start download, d04_text_station_5min_2021_10_16.txt.gz
2023-01-04 15:59:40,123 [INFO    ] Download completed
2023-01-04 15:59:45,131 [INFO    ] Start download, d04_text_station_5min_2021_10_17.txt.gz
2023-01-04 15:59:53,683 [INFO    ] Download completed
2023-01-04 15:59:58,691 [INFO    ] Start download, d04_text_station_5min_2021_10_18.txt.gz
2023-01-04 16:00:05,878 [INFO    ] Download completed
2023-01-04 16:00:10,886 [INFO    ] Start download, d04_text_station_5min_202

2023-01-04 16:11:15,115 [INFO    ] Download completed
2023-01-04 16:11:20,124 [INFO    ] Start download, d04_text_station_5min_2021_12_09.txt.gz
2023-01-04 16:11:28,007 [INFO    ] Download completed
2023-01-04 16:11:33,015 [INFO    ] Start download, d04_text_station_5min_2021_12_10.txt.gz
2023-01-04 16:11:41,808 [INFO    ] Download completed
2023-01-04 16:11:46,816 [INFO    ] Start download, d04_text_station_5min_2021_12_11.txt.gz
2023-01-04 16:11:55,471 [INFO    ] Download completed
2023-01-04 16:12:00,479 [INFO    ] Start download, d04_text_station_5min_2021_12_12.txt.gz
2023-01-04 16:12:08,827 [INFO    ] Download completed
2023-01-04 16:12:13,835 [INFO    ] Start download, d04_text_station_5min_2021_12_13.txt.gz
2023-01-04 16:12:21,591 [INFO    ] Download completed
2023-01-04 16:12:26,599 [INFO    ] Start download, d04_text_station_5min_2021_12_14.txt.gz
2023-01-04 16:12:35,147 [INFO    ] Download completed
2023-01-04 16:12:40,155 [INFO    ] Start download, d04_text_station_5min_202

In [None]:
path = r'/storage/home/amd7293/caltrans-pems/data/dist_4/2021' # use your path
all_files = glob.glob(os.path.join(path , "*.gz"))

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, compression='gzip', header= None, sep=',', quotechar='"')
    li.append(df)

traffic_station_df = pd.concat(li, axis=0, ignore_index=True)

columns = ["timestamp", "id" ,"district", "freeway_no", "direction", "lane_type", "station_length", "samples", "pct_obs", "total_flow",
          "avg_occ", "avg_speed"]
#traffic_station_df = pd.read_csv('/storage/home/amd7293/caltrans-pems/data/d03_text_station_5min_2019_02_12.txt.gz',
                                 #compression='gzip', header= None, sep=',', quotechar='"')
traffic_station_df = traffic_station_df.iloc[:, 0:12]
traffic_station_df.columns = columns

# Timestamp	

The date and time of the beginning of the summary interval. For example, a time of 08:00:00 indicates that the aggregate(s) contain measurements collected between 08:00:00 and 08:04:59. Note that second values are always 0 for five-minute aggregations. The format is MM/DD/YYYY HH24:MI:SS.	 

# Station	

Unique station identifier. Use this value to cross-reference with Metadata files.

# District	

District #	 

# Freeway #	

Freeway #	

# Direction of Travel

N | S | E | W	 

# Lane Type	

A string indicating the type of lane. Possible values (and their meaning are:
CD (Coll/Dist)
CH (Conventional Highway)
FF (Fwy-Fwy connector)
FR (Off Ramp)
HV (HOV)
ML (Mainline)
OR (On Ramp)
 
Station Length	Segment length covered by the station in miles/km.	 
Samples	Total number of samples received for all lanes.	 
% Observed	Percentage of individual lane points at this location that were observed (e.g. not imputed).	%
Total Flow	Sum of flows over the 5-minute period across all lanes. Note that the basic 5-minute rollup normalizes flow by the number of good samples received from the controller.	Veh/5-min
Avg Occupancy	Average occupancy across all lanes over the 5-minute period expressed as a decimal number between 0 and 1.	%
Avg Speed	Flow-weighted average speed over the 5-minute period across all lanes. If flow is 0, mathematical average of 5-minute station speeds.	Mph
Lane N Samples	Number of good samples received for lane N. N ranges from 1 to the number of lanes at the location.	 
Lane N Flow	Total flow for lane N over the 5-minute period normalized by the number of good samples.	Veh/5-min
Lane N Avg Occ	Average occupancy for lane N expressed as a decimal number between 0 and 1. N ranges from 1 to the number of lanes at the location.	%
Lane N
Avg Speed	Flow-weighted average of lane N speeds. If flow is 0, mathematical average of 5-minute lane speeds. N ranges from 1 to the number of lanes	Mph
Lane N
Observed	1 indicates observed data, 0 indicates imputed.

In [None]:
traffic_station_df
traffic_station_df.to_csv('/storage/home/amd7293/caltrans-pems/CSV/traffic_station_df_d4_2021.csv',index=False)