In [185]:
using CSV, DataFrames, Dates
using Tar, CodecZlib

In [197]:
url = "https://service.scedc.caltech.edu/ftp/catalogs/SCEC_DC/SCEDC_catalogs.tar.gz"
download(url,"./california.tar.gz")

open(GzipDecompressorStream, "./california.tar.gz") do io
    Tar.extract(io, "output")
end;

In [187]:
# Declare types of the needed columns
datetime = Vector{String}()
latitude, longitude, depth =  Vector{Float64}(), Vector{Float64}(), Vector{Float64}()
magnitude =  Vector{Float64}()
magnitude_type = Vector{String}()
event_type = Vector{String}();

In [188]:
for year=1932:2023
    filename="./output/SCEC_DC/" * string(year) *  ".catalog"
    open(filename) do io
        # Skip first 10 lines
        for i=1:10
            line = readline(io)
            # println(line)
        end
        # Parse all lines until you get an empty line (skip last 2 lines)
        while true
            line = readline(io)
            # detect the end of the line
            line == "" && break 
            # push to the vectors, the characters based on the position in the text file
            # push!(date,  strip(line[1:10]))
            push!(datetime,  strip(line[1:22]))
            push!(event_type,  strip(line[24:25]))
            push!(magnitude, parse(Float64, strip(line[30:33])))
            push!(magnitude_type,strip(line[35:37]))
            push!(latitude, parse(Float64,strip(line[40:45])))
            push!(longitude, parse(Float64, strip(line[47:54])))
            push!(depth, parse(Float64, strip(line[56:60])))
    
        end
    end
end

# Handling improper second formatting in original data
for i in eachindex(datetime)
    # try turning string into datetime
    try
        element_date = DateTime.(datetime[i],dateformat)
    # catch the element that gives error
    catch e
        # collect each character of the string
        as = collect(datetime[i])
        # we know that the second gives problem; second is in 18 position
        # seconds formated as "60", not possible. change to 50
        as[18] = '5'
        # join back the characters into string and modify in the original vector
        datetime[i] = join(as)
    end
end


df = DataFrame(Datetime=datetime, 
        Latitude=latitude, Longitude=longitude, Depth=depth,
        Magnitude=magnitude, Event_Type=event_type, Magnitude_Type=magnitude_type);

In [189]:
dateformat = dateformat"yyyy/mm/dd HH:MM:SS.ss"
df.Datetime = DateTime.(df.Datetime, dateformat);

In [191]:
california = df[(df.Event_Type .== "eq") .& (df.Magnitude .> 0.0),:];

In [192]:
california

Row,Datetime,Latitude,Longitude,Depth,Magnitude,Event_Type,Magnitude_Type
Unnamed: 0_level_1,DateTime,Float64,Float64,Float64,Float64,String,String
1,1932-01-02T16:42:43.680,33.903,-117.645,6.0,2.73,eq,l
2,1932-01-03T17:58:10.010,32.0,-116.0,6.0,3.0,eq,h
3,1932-01-04T21:30:00.960,33.771,-117.494,6.0,2.0,eq,h
4,1932-01-05T02:37:27.960,33.559,-118.442,6.0,1.5,eq,h
5,1932-01-06T08:08:38.620,34.143,-117.475,6.0,2.0,eq,h
6,1932-01-07T02:29:22.370,33.992,-118.194,6.0,1.0,eq,h
7,1932-01-07T05:39:23.730,34.094,-117.263,6.0,1.0,eq,h
8,1932-01-07T14:55:43.920,32.0,-116.0,6.0,3.0,eq,h
9,1932-01-07T21:25:55.850,34.775,-118.814,6.0,2.0,eq,h
10,1932-01-10T10:44:54.530,33.857,-117.537,6.0,3.05,eq,h


In [194]:
# CSV.write("../../data/california.csv", california)
CSV.write("./california.csv", california)

"./california.csv"

In [195]:
CSV.write("../../data/california.csv", california)

"../../data/california.csv"

In [196]:
# Remove downloaded and extracted data
rm("output", recursive=true)
rm("california.tar.gz")