# Day 3: Load Datasets from CSV


## Loading CSV files into Python using the CSV.reader() function in the standard library
* File must be downloaded locally.

In [1]:
import csv
with open("constituents.csv", mode = "r") as file:
    csv_file = csv.reader(file)
    for text in csv_file:
        print(text)

['Symbol', 'Security', 'GICS Sector', 'GICS Sub-Industry', 'Headquarters Location', 'Date added', 'CIK', 'Founded']
['MMM', '3M', 'Industrials', 'Industrial Conglomerates', 'Saint Paul, Minnesota', '1957-03-04', '66740', '1902']
['AOS', 'A. O. Smith', 'Industrials', 'Building Products', 'Milwaukee, Wisconsin', '2017-07-26', '91142', '1916']
['ABT', 'Abbott', 'Health Care', 'Health Care Equipment', 'North Chicago, Illinois', '1957-03-04', '1800', '1888']
['ABBV', 'AbbVie', 'Health Care', 'Biotechnology', 'North Chicago, Illinois', '2012-12-31', '1551152', '2013 (1888)']
['ACN', 'Accenture', 'Information Technology', 'IT Consulting & Other Services', 'Dublin, Ireland', '2011-07-06', '1467373', '1989']
['ADBE', 'Adobe Inc.', 'Information Technology', 'Application Software', 'San Jose, California', '1997-05-05', '796343', '1982']
['AMD', 'Advanced Micro Devices', 'Information Technology', 'Semiconductors', 'Santa Clara, California', '2017-03-20', '2488', '1969']
['AES', 'AES Corporation', 

## Load CSV files using NumPy and the numpy.loadtxt() function

In [2]:
import numpy as np

# Local file
array = np.loadtxt("alias.csv", delimiter=",", dtype=str)
print(array)

[['Country' 'Name' 'NameWoDiacritics']
 ['AE' 'Ruwais = Ar Ruways' 'Ruwais = Ar Ruways']
 ['AO' 'Novo Redondo = Sumbe' 'Novo Redondo = Sumbe']
 ['AO' 'San Antonio do Zaire = Soyo' 'San Antonio do Zaire = Soyo']
 ['AT' 'Vienna = Wien' 'Vienna = Wien']
 ['BE' 'Antwerp = Antwerpen' 'Antwerp = Antwerpen']
 ['CA' 'Three Rivers = Trois-RiviÃ¨res' 'Three Rivers = Trois-Rivieres']
 ['CH' 'Geneva = GenÃ¨ve' 'Geneva = Geneve']
 ['CL' 'Easter Island = Isla de Pascua' 'Easter Island = Isla de Pascua']
 ['CN' 'Amoy = Xiamen Gaoqi International Apt'
  'Amoy = Xiamen Gaoqi International Apt']
 ['CN' 'Canton = Guangzhou Baiyun International Apt'
  'Canton = Guangzhou Baiyun International Apt']
 ['CN' 'Changjiagang = Zhangjiagang' 'Changjiagang = Zhangjiagang']
 ['CN' 'Chefoo = Yantai Laishan International Apt'
  'Chefoo = Yantai Laishan International Apt']
 ['CN' 'Chinkiang = Zhenjiang' 'Chinkiang = Zhenjiang']
 ['CN' 'Dairen = Dalian Zhoushuizi International Apt'
  'Dairen = Dalian Zhoushuizi Interna

In [3]:
# Online file
url = "https://raw.githubusercontent.com/datasets/un-locode/main/data/alias.csv"
array = np.loadtxt(url, delimiter=",", dtype=str)
print(array)

[['Country' 'Name' 'NameWoDiacritics']
 ['AE' 'Ruwais = Ar Ruways' 'Ruwais = Ar Ruways']
 ['AO' 'Novo Redondo = Sumbe' 'Novo Redondo = Sumbe']
 ['AO' 'San Antonio do Zaire = Soyo' 'San Antonio do Zaire = Soyo']
 ['AT' 'Vienna = Wien' 'Vienna = Wien']
 ['BE' 'Antwerp = Antwerpen' 'Antwerp = Antwerpen']
 ['CA' 'Three Rivers = Trois-RiviÃ¨res' 'Three Rivers = Trois-Rivieres']
 ['CH' 'Geneva = GenÃ¨ve' 'Geneva = Geneve']
 ['CL' 'Easter Island = Isla de Pascua' 'Easter Island = Isla de Pascua']
 ['CN' 'Amoy = Xiamen Gaoqi International Apt'
  'Amoy = Xiamen Gaoqi International Apt']
 ['CN' 'Canton = Guangzhou Baiyun International Apt'
  'Canton = Guangzhou Baiyun International Apt']
 ['CN' 'Changjiagang = Zhangjiagang' 'Changjiagang = Zhangjiagang']
 ['CN' 'Chefoo = Yantai Laishan International Apt'
  'Chefoo = Yantai Laishan International Apt']
 ['CN' 'Chinkiang = Zhenjiang' 'Chinkiang = Zhenjiang']
 ['CN' 'Dairen = Dalian Zhoushuizi International Apt'
  'Dairen = Dalian Zhoushuizi Interna

## Load CSV files using Pandas and the pandas.read_csv() function

In [4]:
import pandas as pd

# Local file
df = pd.read_csv("world-cities.csv")
print(df)

                      name               country             subcountry  \
0             les Escaldes               Andorra     Escaldes-Engordany   
1         Andorra la Vella               Andorra       Andorra la Vella   
2       Umm Al Quwain City  United Arab Emirates  Imārat Umm al Qaywayn   
3      Ras Al Khaimah City  United Arab Emirates        Raʼs al Khaymah   
4               Zayed City  United Arab Emirates              Abu Dhabi   
...                    ...                   ...                    ...   
26462             Bulawayo              Zimbabwe               Bulawayo   
26463              Bindura              Zimbabwe    Mashonaland Central   
26464           Beitbridge              Zimbabwe     Matabeleland South   
26465              Epworth              Zimbabwe                 Harare   
26466          Chitungwiza              Zimbabwe                 Harare   

       geonameid  
0        3040051  
1        3041563  
2         290594  
3         291074  
4   

In [5]:
# Online file
url = "https://raw.githubusercontent.com/datasets/world-cities/master/data/world-cities.csv"
df = pd.read_csv(url)
print(df)

                      name               country             subcountry  \
0             les Escaldes               Andorra     Escaldes-Engordany   
1         Andorra la Vella               Andorra       Andorra la Vella   
2       Umm Al Quwain City  United Arab Emirates  Imārat Umm al Qaywayn   
3      Ras Al Khaimah City  United Arab Emirates        Raʼs al Khaymah   
4               Zayed City  United Arab Emirates              Abu Dhabi   
...                    ...                   ...                    ...   
26462             Bulawayo              Zimbabwe               Bulawayo   
26463              Bindura              Zimbabwe    Mashonaland Central   
26464           Beitbridge              Zimbabwe     Matabeleland South   
26465              Epworth              Zimbabwe                 Harare   
26466          Chitungwiza              Zimbabwe                 Harare   

       geonameid  
0        3040051  
1        3041563  
2         290594  
3         291074  
4   