## Reading multiple data files 

In [1]:
import pandas as pd
filenames = ['sales-jan-2015.csv','sales-feb-2015.csv']
dataframes=[]

for f in filenames:
    dataframes.append(pd.read_csv(f))

In [2]:
for f in [0,1]:
    print(dataframes[f].head())

                  Date    Company   Product  Units
0  2015-01-21 19:13:21  Streeplex  Hardware     11
1  2015-01-09 05:23:51  Streeplex   Service      8
2  2015-01-06 17:19:34    Initech  Hardware     17
3  2015-01-02 09:51:06      Hooli  Hardware     16
4  2015-01-11 14:51:02      Hooli  Hardware     11
                  Date    Company   Product  Units
0  2015-02-26 08:57:45  Streeplex   Service      4
1  2015-02-16 12:09:19      Hooli  Software     10
2  2015-02-03 14:14:18    Initech  Software     13
3  2015-02-02 08:33:01      Hooli  Software      3
4  2015-02-25 00:29:00    Initech   Service     10


### Using list comprehensions 

In [3]:
dataframes=[pd.read_csv(f) for f in filenames]

In [4]:
dataframes[0].head()

Unnamed: 0,Date,Company,Product,Units
0,2015-01-21 19:13:21,Streeplex,Hardware,11
1,2015-01-09 05:23:51,Streeplex,Service,8
2,2015-01-06 17:19:34,Initech,Hardware,17
3,2015-01-02 09:51:06,Hooli,Hardware,16
4,2015-01-11 14:51:02,Hooli,Hardware,11


### Using glob 

Glob is used when many files have similar names

In [5]:
from glob import glob

In [6]:
filenames = glob('sales*.csv') # searches for all files starting with sales and having .csv format
dataframes = [pd.read_csv(f) for f in filenames]

In [7]:
print(filenames)

['sales-feb-2015.csv', 'sales-jan-2015.csv', 'sales-mar-2015.csv']


### Indexing 

In [8]:
gold = pd.read_csv('Gold.csv',index_col = 'Country')

In [9]:
gold.head()

Unnamed: 0_level_0,NOC,Total
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
United States,USA,2088.0
Soviet Union,URS,838.0
United Kingdom,GBR,498.0
France,FRA,378.0
Germany,GER,407.0


In [10]:
gold.sort_index().head()

Unnamed: 0_level_0,NOC,Total
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
0,RU1,1.0
Afghanistan,AFG,
Algeria,ALG,4.0
Argentina,ARG,68.0
Armenia,ARM,1.0


In [11]:
gold.sort_index(ascending=False).head()

Unnamed: 0_level_0,NOC,Total
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Zimbabwe,ZIM,18.0
Zambia,ZAM,
Yugoslavia,YUG,143.0
West Germany,FRG,143.0
Virgin Islands*,ISV,


### Countries that have won Gold, Silver & Bronze medals in the Summer Olympics

In [12]:
silver = pd.read_csv('Silver.csv',index_col = 'Country')
bronze = pd.read_csv('Bronze.csv',index_col = 'Country')

In [13]:
gold.info()

<class 'pandas.core.frame.DataFrame'>
Index: 138 entries, United States to United Arab Emirates
Data columns (total 2 columns):
NOC      138 non-null object
Total    96 non-null float64
dtypes: float64(1), object(1)
memory usage: 3.2+ KB


In [14]:
silver.info()

<class 'pandas.core.frame.DataFrame'>
Index: 138 entries, United States to United Arab Emirates
Data columns (total 2 columns):
NOC      138 non-null object
Total    119 non-null float64
dtypes: float64(1), object(1)
memory usage: 3.2+ KB


In [15]:
bronze.info()

<class 'pandas.core.frame.DataFrame'>
Index: 138 entries, United States to United Arab Emirates
Data columns (total 2 columns):
NOC      138 non-null object
Total    119 non-null float64
dtypes: float64(1), object(1)
memory usage: 3.2+ KB


In [16]:
new_silver = silver.reindex(bronze.index)
new_gold = gold.reindex(new_silver.index)

In [17]:
all_medals = new_gold.dropna()

In [18]:
all_medals.info()

<class 'pandas.core.frame.DataFrame'>
Index: 96 entries, United States to United Arab Emirates
Data columns (total 2 columns):
NOC      96 non-null object
Total    96 non-null float64
dtypes: float64(1), object(1)
memory usage: 2.2+ KB


Out of 138 countries, 96 have won gold, silver and bronze medals.

In [19]:
all_medals.sort_values('Total',ascending=False).head()

Unnamed: 0_level_0,NOC,Total
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
United States,USA,2088.0
Soviet Union,URS,838.0
United Kingdom,GBR,498.0
Italy,ITA,460.0
Germany,GER,407.0
