# Video: Reading and Writing Data Frames in Pandas

This code example shows how to easily read and write data frames in pandas.

In [None]:
import pandas as pd

In [None]:
boston = pd.read_csv("https://raw.githubusercontent.com/bu-cds-omds/dx602-examples/main/data/boston-TMAX.tsv",
                                  sep="\t")
boston

Unnamed: 0,stations,date,element,value,mflag,qflag,sflag
0,USC00198368,1994-07-01,TMAX,283,,,Z
1,USC00198368,1994-07-02,TMAX,289,,,Z
2,USC00198368,1994-07-03,TMAX,283,,,Z
3,USC00198368,1994-07-04,TMAX,289,,,Z
4,USC00198368,1994-07-05,TMAX,278,,,Z
...,...,...,...,...,...,...,...
10854,USC00198368,2024-05-12,TMAX,133,,,H
10855,USC00198368,2024-05-13,TMAX,150,,,H
10856,USC00198368,2024-05-14,TMAX,206,,,H
10857,USC00198368,2024-05-15,TMAX,228,,,H


In [None]:
boston.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10859 entries, 0 to 10858
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   stations  10859 non-null  object
 1   date      10859 non-null  object
 2   element   10859 non-null  object
 3   value     10859 non-null  int64 
 4   mflag     10859 non-null  object
 5   qflag     10859 non-null  object
 6   sflag     10859 non-null  object
dtypes: int64(1), object(6)
memory usage: 594.0+ KB


In [None]:
boston["value"].min(), boston["value"].max()

(-139, 394)

In [None]:
boston["tmax"] = boston["value"] / 10
boston["tmax"]

0        28.3
1        28.9
2        28.3
3        28.9
4        27.8
         ... 
10854    13.3
10855    15.0
10856    20.6
10857    22.8
10858    20.6
Name: tmax, Length: 10859, dtype: float64

In [None]:
boston = boston.drop(["value"], axis=1)
boston.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10859 entries, 0 to 10858
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   stations  10859 non-null  object 
 1   date      10859 non-null  object 
 2   element   10859 non-null  object 
 3   mflag     10859 non-null  object 
 4   qflag     10859 non-null  object 
 5   sflag     10859 non-null  object 
 6   tmax      10859 non-null  float64
dtypes: float64(1), object(6)
memory usage: 594.0+ KB


In [None]:
boston = boston[["date", "tmax"]]
boston

Unnamed: 0,date,tmax
0,1994-07-01,28.3
1,1994-07-02,28.9
2,1994-07-03,28.3
3,1994-07-04,28.9
4,1994-07-05,27.8
...,...,...
10854,2024-05-12,13.3
10855,2024-05-13,15.0
10856,2024-05-14,20.6
10857,2024-05-15,22.8


In [None]:
boston.to_csv("boston.tsv", sep="\t")

In [None]:
!head boston.tsv

	date	tmax
0	1994-07-01	28.3
1	1994-07-02	28.9
2	1994-07-03	28.3
3	1994-07-04	28.9
4	1994-07-05	27.8
5	1994-07-06	32.8
6	1994-07-07	33.3
7	1994-07-08	32.2
8	1994-07-09	31.1


In [None]:
boston.to_csv("boston.tsv", sep="\t", index=False)

In [None]:
!head boston.tsv

date	tmax
1994-07-01	28.3
1994-07-02	28.9
1994-07-03	28.3
1994-07-04	28.9
1994-07-05	27.8
1994-07-06	32.8
1994-07-07	33.3
1994-07-08	32.2
1994-07-09	31.1
