# Uvoz podatkov


In [1]:
import pandas as pd
import numpy as np

https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html

## CSV

### Primer 1: seaslug.txt

In [3]:
! head data/seaslug.txt

Time	Percent
99	0.067
99	0.133
99	0.067
99	0
99	0
0	0.5
0	0.467
0	0.857
0	0.5


In [7]:
pd.read_csv("data/seaslug.txt", delimiter="\t").head()

Unnamed: 0,Time,Percent
0,99,0.067
1,99,0.133
2,99,0.067
3,99,0.0
4,99,0.0


- `sep`: str, default ‘,’: Character or regex pattern to treat as the delimiter
- `delimiter`: str, optional: Alias for sep.

### Primer 2: FOOD_DES.txt

In [10]:
! head data/FOOD_DES.txt

~01001~^~0100~^~Butter, salted~^~BUTTER,WITH SALT~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01002~^~0100~^~Butter, whipped, with salt~^~BUTTER,WHIPPED,W/ SALT~^~~^~~^~Y~^~~^0^~~^6.38^^^
~01003~^~0100~^~Butter oil, anhydrous~^~BUTTER OIL,ANHYDROUS~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01004~^~0100~^~Cheese, blue~^~CHEESE,BLUE~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01005~^~0100~^~Cheese, brick~^~CHEESE,BRICK~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01006~^~0100~^~Cheese, brie~^~CHEESE,BRIE~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01007~^~0100~^~Cheese, camembert~^~CHEESE,CAMEMBERT~^~~^~~^~Y~^~~^0^~~^6.38^4.27^8.79^3.87
~01008~^~0100~^~Cheese, caraway~^~CHEESE,CARAWAY~^~~^~~^~~^~~^0^~~^6.38^4.27^8.79^3.87
~01009~^~0100~^~Cheese, cheddar~^~CHEESE,CHEDDAR~^~~^~~^~Y~^~~^0^~~^^^^
~01010~^~0100~^~Cheese, cheshire~^~CHEESE,CHESHIRE~^~~^~~^~~^~~^0^~~^6.38^4.27^8.79^3.87


In [15]:
pd.read_csv("data/FOOD_DES.txt", sep="^", encoding="iso-8859-1", quotechar="~", header=None, nrows=10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,1001,100,"Butter, salted","BUTTER,WITH SALT",,,Y,,0,,6.38,4.27,8.79,3.87
1,1002,100,"Butter, whipped, with salt","BUTTER,WHIPPED,W/ SALT",,,Y,,0,,6.38,,,
2,1003,100,"Butter oil, anhydrous","BUTTER OIL,ANHYDROUS",,,Y,,0,,6.38,4.27,8.79,3.87
3,1004,100,"Cheese, blue","CHEESE,BLUE",,,Y,,0,,6.38,4.27,8.79,3.87
4,1005,100,"Cheese, brick","CHEESE,BRICK",,,Y,,0,,6.38,4.27,8.79,3.87
5,1006,100,"Cheese, brie","CHEESE,BRIE",,,Y,,0,,6.38,4.27,8.79,3.87
6,1007,100,"Cheese, camembert","CHEESE,CAMEMBERT",,,Y,,0,,6.38,4.27,8.79,3.87
7,1008,100,"Cheese, caraway","CHEESE,CARAWAY",,,,,0,,6.38,4.27,8.79,3.87
8,1009,100,"Cheese, cheddar","CHEESE,CHEDDAR",,,Y,,0,,,,,
9,1010,100,"Cheese, cheshire","CHEESE,CHESHIRE",,,,,0,,6.38,4.27,8.79,3.87


### Primer 3: mpls_stops.csv

In [16]:
! head data/mpls_stops.csv

Unnamed: 0,id Num,date,problem,MDC,citation Issued,person Search,vehicle Search,pre Race,race,gender,lat,long,police Precinct,neighborhood
,idNum,date,problem,MDC,citationIssued,personSearch,vehicleSearch,preRace,race,gender,lat,long,policePrecinct,neighborhood
6823.0,17-000003,2017-01-01 00:00:42,suspicious,MDC,,NO,NO,Unknown,Unknown,Unknown,44.96661711,-93.24645826,1,Cedar Riverside
6824.0,17-000007,2017-01-01 00:03:07,suspicious,MDC,,NO,NO,Unknown,Unknown,Male,44.98045,-93.27134,1,Downtown West
6825.0,17-000073,2017-01-01 00:23:15,traffic,MDC,,NO,NO,Unknown,White,Female,44.94835,-93.27538,5,Whittier
6826.0,17-000092,2017-01-01 00:33:48,suspicious,MDC,,NO,NO,Unknown,East African,Male,44.94836,-93.28135,5,Whittier
6827.0,17-000098,2017-01-01 00:37:58,traffic,MDC,,NO,NO,Unknown,White,Female,44.9790778,-93.26207579,1,Downtown West
6828.0,17-000111,2017-01-01 00:46:48,traffic,MDC,,NO,NO,Unknown,East African,Male,44.98053512,-93.26362691,1,Downtown West
6829.0,17-000114,2017-01-01 00:48:4

https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes

In [44]:
# uredimo imena stolpcev
mpls_columns = pd.read_csv("data/mpls_stops.csv", nrows=2).columns
mpls_columns = [name.lower().replace(" ", "_") for name in mpls_columns]
mpls_columns[0] = "case_number_id"

mpls = pd.read_csv("data/mpls_stops.csv",
                   names=mpls_columns,
                   nrows=10,
                   engine="c",
                   skiprows=2, 
                   dtype={
                       "lat": "float",
                       "long": "float"
                   },
                   na_values=["Unknown"], 
                   true_values=["YES"], 
                   false_values=["NO"],
                   parse_dates=["date"],
                   index_col="case_number_id",
                   date_format="%Y-%m-%d %H:%M:%S"
                )

#mpls.info()
mpls

Unnamed: 0_level_0,id_num,date,problem,mdc,citation_issued,person_search,vehicle_search,pre_race,race,gender,lat,long,police_precinct,neighborhood
case_number_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
6823.0,17-000003,2017-01-01 00:00:42,suspicious,MDC,,False,False,,,,44.966617,-93.246458,1,Cedar Riverside
6824.0,17-000007,2017-01-01 00:03:07,suspicious,MDC,,False,False,,,Male,44.98045,-93.27134,1,Downtown West
6825.0,17-000073,2017-01-01 00:23:15,traffic,MDC,,False,False,,White,Female,44.94835,-93.27538,5,Whittier
6826.0,17-000092,2017-01-01 00:33:48,suspicious,MDC,,False,False,,East African,Male,44.94836,-93.28135,5,Whittier
6827.0,17-000098,2017-01-01 00:37:58,traffic,MDC,,False,False,,White,Female,44.979078,-93.262076,1,Downtown West
6828.0,17-000111,2017-01-01 00:46:48,traffic,MDC,,False,False,,East African,Male,44.980535,-93.263627,1,Downtown West
6829.0,17-000114,2017-01-01 00:48:46,suspicious,MDC,,False,False,,Black,Male,44.980808,-93.273141,1,Downtown West
6830.0,17-000120,2017-01-01 00:50:55,traffic,MDC,,False,False,,Other,Female,44.982093,-93.238155,2,Marcy Holmes
6831.0,17-000127,2017-01-01 00:57:10,traffic,MDC,,False,False,,White,Male,44.990319,-93.252042,2,Nicollet Island - East Bank
6832.0,17-000139,2017-01-01 01:05:50,traffic,MDC,,False,False,,Black,Male,45.01327,-93.30824,4,Folwell


In [46]:
%timeit mpls = pd.read_csv('data/mpls_stops.csv', names=mpls_columns, skiprows=2, engine='python')

1.56 s ± 174 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [47]:
%timeit mpls = pd.read_csv('data/mpls_stops.csv', names=mpls_columns, skiprows=2, engine='c')

586 ms ± 18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Primer: iperf.txt