In [2]:
using CSV, DataFrames, Dates

In [3]:
url = "http://www.infp.ro/data/romplus.txt"

"http://www.infp.ro/data/romplus.txt"

In [4]:
download(url,"./romania.txt")
# http_response = download(url)
# file = CSV.File(http_response)

"./romania.txt"

In [5]:
# function that parses the downloaded database and extract info to be put in dataframe
function seismic_db_parser(filename)

    # Declare types of the needed columns
    datetime = Vector{String}()
    latitude, longitude, depth =  Vector{Float64}(), Vector{Float64}(), Vector{Float64}()
    magnitude =  Vector{Float64}()
    
    # open the file
    open(filename) do io
        # skip the first line
        line = readline(io)
        while true
            # start reading the lines
            line = readline(io)
            # detect the end of the line
            line == "" && break
            # push to the vectors, the characters based on the position in the text file
            # push!(date,  strip(line[1:10]))
            push!(datetime,  strip(line[1:23]))
            push!(latitude, parse(Float64,strip(line[38:45])))
            push!(longitude, parse(Float64, strip(line[48:56])))
            push!(depth, parse(Float64, strip(line[76:80])))
            push!(magnitude, parse(Float64, strip(line[108:110])))
       end
    end
    # return the vectors with data as a dataframe
    [datetime, latitude, longitude, depth, magnitude]
end

seismic_db_parser (generic function with 1 method)

In [6]:
features = seismic_db_parser("romania.txt") 

5-element Vector{Vector}:
 ["984/01/01  00:00:00.00", "1022/05/12  00:00:00.00", "1038/08/15  00:00:00.00", "1091/01/01  00:00:00.00", "1107/02/12  03:00:00.00", "1122/10/01  00:00:00.00", "1126/08/08  00:00:00.00", "1170/04/01  00:00:00.00", "1196/02/13  07:00:00.00", "1223/01/08  00:00:00.00"  …  "2022/12/28  07:38:54.27", "2022/12/28  13:15:05.47", "2022/12/29  02:06:28.32", "2022/12/29  09:28:11.27", "2022/12/29  09:42:33.92", "2022/12/29  12:16:03.08", "2022/12/29  13:11:43.57", "2022/12/30  14:23:49.39", "2022/12/31  03:16:48.32", "2022/12/31  15:24:39.55"]
 [45.7, 45.7, 45.7, 45.7, 45.7, 45.7, 45.7, 45.7, 45.7, 46.2  …  45.7505, 46.3193, 45.6714, 45.2972, 45.9596, 46.3226, 45.4378, 45.4639, 45.4082, 45.0022]
 [26.6, 26.6, 26.6, 26.6, 26.6, 26.6, 26.6, 26.6, 26.6, 24.4  …  26.6086, 23.1806, 26.6291, 25.093, 22.6972, 23.1438, 24.1116, 24.1433, 24.134, 22.6597]
 [150.0, 150.0, 150.0, 150.0, 150.0, 150.0, 150.0, 150.0, 150.0, 9.9  …  85.2, 2.0, 134.0, 2.0, 3.3, 1.0, 11.7, 14.0, 18.2

In [7]:
df = DataFrame(Datetime=features[1], 
                Latitude=features[2], Longitude=features[3], Depth=features[4],
                Magnitude=features[5])

Row,Datetime,Latitude,Longitude,Depth,Magnitude
Unnamed: 0_level_1,String,Float64,Float64,Float64,Float64
1,984/01/01 00:00:00.00,45.7,26.6,150.0,7.1
2,1022/05/12 00:00:00.00,45.7,26.6,150.0,6.5
3,1038/08/15 00:00:00.00,45.7,26.6,150.0,7.3
4,1091/01/01 00:00:00.00,45.7,26.6,150.0,7.1
5,1107/02/12 03:00:00.00,45.7,26.6,150.0,7.1
6,1122/10/01 00:00:00.00,45.7,26.6,150.0,6.2
7,1126/08/08 00:00:00.00,45.7,26.6,150.0,7.1
8,1170/04/01 00:00:00.00,45.7,26.6,150.0,7.3
9,1196/02/13 07:00:00.00,45.7,26.6,150.0,7.5
10,1223/01/08 00:00:00.00,46.2,24.4,9.9,5.9


In [8]:
dateformat = dateformat"yyyy/mm/dd  HH:MM:SS.s"

dateformat"yyyy/mm/dd  HH:MM:SS.s"

In [9]:
df.Datetime = DateTime.(df.Datetime, dateformat)

32628-element Vector{DateTime}:
 0984-01-01T00:00:00
 1022-05-12T00:00:00
 1038-08-15T00:00:00
 1091-01-01T00:00:00
 1107-02-12T03:00:00
 1122-10-01T00:00:00
 1126-08-08T00:00:00
 1170-04-01T00:00:00
 1196-02-13T07:00:00
 1223-01-08T00:00:00
 ⋮
 2022-12-28T13:15:05.470
 2022-12-29T02:06:28.320
 2022-12-29T09:28:11.270
 2022-12-29T09:42:33.920
 2022-12-29T12:16:03.080
 2022-12-29T13:11:43.570
 2022-12-30T14:23:49.390
 2022-12-31T03:16:48.320
 2022-12-31T15:24:39.550

In [10]:
df

Row,Datetime,Latitude,Longitude,Depth,Magnitude
Unnamed: 0_level_1,DateTime,Float64,Float64,Float64,Float64
1,0984-01-01T00:00:00,45.7,26.6,150.0,7.1
2,1022-05-12T00:00:00,45.7,26.6,150.0,6.5
3,1038-08-15T00:00:00,45.7,26.6,150.0,7.3
4,1091-01-01T00:00:00,45.7,26.6,150.0,7.1
5,1107-02-12T03:00:00,45.7,26.6,150.0,7.1
6,1122-10-01T00:00:00,45.7,26.6,150.0,6.2
7,1126-08-08T00:00:00,45.7,26.6,150.0,7.1
8,1170-04-01T00:00:00,45.7,26.6,150.0,7.3
9,1196-02-13T07:00:00,45.7,26.6,150.0,7.5
10,1223-01-08T00:00:00,46.2,24.4,9.9,5.9


In [13]:
df[∈([0.0]).(df.Magnitude), :]

Row,Datetime,Latitude,Longitude,Depth,Magnitude
Unnamed: 0_level_1,DateTime,Float64,Float64,Float64,Float64
1,1802-10-26T20:30:00,45.7,26.6,99.9,0.0
2,1802-10-27T01:00:00,45.7,26.6,99.9,0.0
3,1802-10-28T00:00:00,45.7,26.6,99.9,0.0
4,1802-11-07T00:00:00,45.7,26.6,99.9,0.0
5,1829-07-01T01:37:00,47.6,22.3,9.9,0.0
6,1829-11-26T17:00:00,45.7,26.6,99.9,0.0
7,1838-01-25T01:00:00,45.7,26.6,99.9,0.0
8,1838-02-10T03:00:00,45.7,26.6,99.9,0.0
9,1855-01-23T23:30:00,45.5,25.6,9.9,0.0
10,1876-01-15T05:00:00,47.9,23.9,9.9,0.0


In [14]:
romania = df[df.Magnitude .> 0.0,:]

Row,Datetime,Latitude,Longitude,Depth,Magnitude
Unnamed: 0_level_1,DateTime,Float64,Float64,Float64,Float64
1,0984-01-01T00:00:00,45.7,26.6,150.0,7.1
2,1022-05-12T00:00:00,45.7,26.6,150.0,6.5
3,1038-08-15T00:00:00,45.7,26.6,150.0,7.3
4,1091-01-01T00:00:00,45.7,26.6,150.0,7.1
5,1107-02-12T03:00:00,45.7,26.6,150.0,7.1
6,1122-10-01T00:00:00,45.7,26.6,150.0,6.2
7,1126-08-08T00:00:00,45.7,26.6,150.0,7.1
8,1170-04-01T00:00:00,45.7,26.6,150.0,7.3
9,1196-02-13T07:00:00,45.7,26.6,150.0,7.5
10,1223-01-08T00:00:00,46.2,24.4,9.9,5.9


In [15]:
CSV.write("../data/romania.csv", romania)

"../data/romania.csv"