In [3]:
import pandas as pd 
import csv 

- filepath: Path of the file to be read.
- sep: Character(s) that are used as a field separator in the file.
- header: Index of the row containing the names of the columns (None if none).
- index_col: Index of the column or sequence of indexes that should be used as index of rows of the data.
- names: Sequence containing the names of the columns (used together with header = None).
- skiprows: Number of rows or sequence of row indexes to ignore in the load.
- na_values: Sequence of values that, if found in the file, should be treated as NaN.
- dtype: Dictionary in which the keys will be column names and the values will be types of NumPy to which their content must be converted.
- parse_dates: Flag that indicates if Python should try to parse data with a format similar to dates as dates. You can enter a list of column names that must be joined for the parsing as a date.
- date_parser: Function to use to try to parse dates.
- nrows: Number of rows to read from the beginning of the file.
- skip_footer: Number of rows to ignore at the end of the file.
- encoding: Encoding to be expected from the file read.
- squeeze: Flag that indicates that if the data read only contains one column the result is a Series instead of a DataFrame.
- thousands: Character to use to detect the thousands separator.
- decimal: Character to use to detect the decimal separator.
- skip_blank_lines: Flag that indicates whether blank lines should be ignored.

In [4]:
csv_url = "https://raw.githubusercontent.com/datasets/gdp/master/data/gdp.csv"

pd.read_csv(csv_url).head()

Unnamed: 0,Country Name,Country Code,Year,Value
0,Arab World,ARB,1968,25760680000.0
1,Arab World,ARB,1969,28434200000.0
2,Arab World,ARB,1970,31385500000.0
3,Arab World,ARB,1971,36426910000.0
4,Arab World,ARB,1972,43316060000.0


In [2]:
pd.read_csv?

In [5]:
df = pd.read_csv(
    'RDP-Reading-Data-with-Python-and-Pandas-master/unit-1-reading-data-with-python-and-pandas/lesson-1-reading-csv-and-txt-files/files/btc-market-price.csv',
    header=None
)

df.head()

Unnamed: 0,0,1
0,2/4/17 0:00,1099.169125
1,3/4/17 0:00,1141.813
2,4/4/17 0:00,?
3,5/4/17 0:00,1133.079314
4,6/4/17 0:00,-


In [8]:
df[1].unique()

array(['1099.169125', '1141.813', '?', '1133.079314', '-', '1181.149838',
       '1208.8005', '1207.744875', '1226.617038', '1218.92205',
       '1180.023713', '1185.260057', '1184.880671', '1186.927413',
       '1216.186743', '1217.930088', '1241.686325', '1258.361413',
       '1261.311225', '1257.988113', '1262.902775', '1279.414688',
       '1309.109875', '1345.353913', '1331.294429', '1334.979038',
       '1417.172813', '1452.076288', '1507.576857', '1508.292125',
       '1533.335071', '1560.4102', '1535.868429', '1640.619225',
       '1721.284971', '1762.88625', '1820.990563', '1720.4785',
       '1771.920013', '1776.3165', '1723.126938', '1739.031975',
       '1807.485063', '1899.082888', '1961.520488', '2052.909788',
       '2046.534463', '2090.662313', '2287.710288', '2379.193833',
       '2387.206286', '2211.976857', '2014.052963', '2192.9808',
       '2275.9307', '2239.205343', '2285.933914', '2399.242671',
       '2446.142414', '2525.765158', '2516.173143', '2698.313813',
  

In [9]:
df = pd.read_csv(
    'RDP-Reading-Data-with-Python-and-Pandas-master/unit-1-reading-data-with-python-and-pandas/lesson-1-reading-csv-and-txt-files/files/btc-market-price.csv',
    header=None,
    na_values=['','?','-']
)

df.head()

Unnamed: 0,0,1
0,2/4/17 0:00,1099.169125
1,3/4/17 0:00,1141.813
2,4/4/17 0:00,
3,5/4/17 0:00,1133.079314
4,6/4/17 0:00,


In [13]:
df = pd.read_csv(
    'RDP-Reading-Data-with-Python-and-Pandas-master/unit-1-reading-data-with-python-and-pandas/lesson-1-reading-csv-and-txt-files/files/btc-market-price.csv',
    header=None,
    na_values=['','?','-'],
    names=['Timestamp','Price']
)

df.head()

Unnamed: 0,Timestamp,Price
0,2/4/17 0:00,1099.169125
1,3/4/17 0:00,1141.813
2,4/4/17 0:00,
3,5/4/17 0:00,1133.079314
4,6/4/17 0:00,


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Timestamp  365 non-null    object 
 1   Price      354 non-null    float64
dtypes: float64(1), object(1)
memory usage: 5.8+ KB


In [15]:
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 365 entries, 0 to 364
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Timestamp  365 non-null    datetime64[ns]
 1   Price      354 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.8 KB


In [16]:
df.head()

Unnamed: 0,Timestamp,Price
0,2017-02-04,1099.169125
1,2017-03-04,1141.813
2,2017-04-04,
3,2017-05-04,1133.079314
4,2017-06-04,


In [17]:
df = pd.read_csv(
    'RDP-Reading-Data-with-Python-and-Pandas-master/unit-1-reading-data-with-python-and-pandas/lesson-1-reading-csv-and-txt-files/files/btc-market-price.csv',
    header=None,
    na_values=['','?','-'],
    names=['Timestamp','Price'],
    parse_dates=[0],
    dtype={'Price':'float'}
)

df.head()

Unnamed: 0,Timestamp,Price
0,2017-02-04,1099.169125
1,2017-03-04,1141.813
2,2017-04-04,
3,2017-05-04,1133.079314
4,2017-06-04,


In [19]:
df.dtypes

Timestamp    datetime64[ns]
Price               float64
dtype: object

# A more challenging parsing

In [20]:
exam_df = pd.read_csv(
    'RDP-Reading-Data-with-Python-and-Pandas-master/unit-1-reading-data-with-python-and-pandas/lesson-1-reading-csv-and-txt-files/files/exam_review.csv'
)

exam_df.head()

Unnamed: 0,Unnamed: 1,first_name>last_name>age>math_score>french_score
"Ray>Morley>18>""68","000"">""75","000"""
Melvin>Scott>24>77>83,,
Amirah>Haley>22>92>67,,
"Gerard>Mills>19>""78","000"">72",
Amy>Grimes>23>91>81,,


In [21]:
exam_df = pd.read_csv(
    'RDP-Reading-Data-with-Python-and-Pandas-master/unit-1-reading-data-with-python-and-pandas/lesson-1-reading-csv-and-txt-files/files/exam_review.csv',
    sep='>'
)

exam_df.head()

Unnamed: 0,first_name,last_name,age,math_score,french_score
0,Ray,Morley,18,68000,75000
1,Melvin,Scott,24,77,83
2,Amirah,Haley,22,92,67
3,Gerard,Mills,19,78000,72
4,Amy,Grimes,23,91,81


In [22]:
exam_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   first_name    5 non-null      object
 1   last_name     5 non-null      object
 2   age           5 non-null      int64 
 3   math_score    5 non-null      object
 4   french_score  5 non-null      object
dtypes: int64(1), object(4)
memory usage: 328.0+ bytes


In [24]:
exam_df = pd.read_csv(
    'RDP-Reading-Data-with-Python-and-Pandas-master/unit-1-reading-data-with-python-and-pandas/lesson-1-reading-csv-and-txt-files/files/exam_review.csv',
    sep='>',
    decimal=','
)

exam_df.head()

Unnamed: 0,first_name,last_name,age,math_score,french_score
0,Ray,Morley,18,68.0,75.0
1,Melvin,Scott,24,77.0,83.0
2,Amirah,Haley,22,92.0,67.0
3,Gerard,Mills,19,78.0,72.0
4,Amy,Grimes,23,91.0,81.0


In [25]:
exam_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   first_name    5 non-null      object 
 1   last_name     5 non-null      object 
 2   age           5 non-null      int64  
 3   math_score    5 non-null      float64
 4   french_score  5 non-null      float64
dtypes: float64(2), int64(1), object(2)
memory usage: 328.0+ bytes


## Saving file

In [26]:
exam_df.to_csv('out.csv')