# Pandas populating the DataFrame

In [2]:
import pandas as pd
import numpy as np

Use the **data** parameter to populate a DataFrame when it is created

In [3]:
data = [['Joe','Pavelski','SJ','C',1000000],
        ['Jonathan','Toews','CHI','C',2000000],
        ['Carey','Price','G','MTL',1000000],
        ['Erik','Karlsson','D','SJ',2000000]]
column_names = ['first_name',
                'last_name',
                'team',
                'position',
                'salary']
players = pd.DataFrame(data, columns = column_names)
print(players)
players.dtypes

  first_name last_name team position   salary
0        Joe  Pavelski   SJ        C  1000000
1   Jonathan     Toews  CHI        C  2000000
2      Carey     Price    G      MTL  1000000
3       Erik  Karlsson    D       SJ  2000000


first_name    object
last_name     object
team          object
position      object
salary         int64
dtype: object

Use **append** to add rows after a DataFrame is created

In [4]:
players = players.append({'first_name':'Jean-Gabriel',
                          'last_name':'Pageau',
                          'team':'SEN',
                          'position':'C',
                          'salary':1000000},
                          ignore_index=True)
players

Unnamed: 0,first_name,last_name,team,position,salary
0,Joe,Pavelski,SJ,C,1000000
1,Jonathan,Toews,CHI,C,2000000
2,Carey,Price,G,MTL,1000000
3,Erik,Karlsson,D,SJ,2000000
4,Jean-Gabriel,Pageau,SEN,C,1000000


Use **read_csv** to create and populate a DataFrame from a csv file

In [5]:
players = pd.read_csv('hockey_players.csv')
players

Unnamed: 0,first_name,last_name,team,position,jersey_number,salary,birthdate
0,Joe,Pavelski,SJ,C,8,6000000.0,1984-07-11
1,Connor,McDavid,EDM,C,97,925000.0,1997-01-13
2,Sidney,Crosby,PIT,C,87,8700000.0,1987-08-07
3,Carey,Price,MTL,G,31,10500000.0,1987-08-16
4,Daniel,Sedin,VAN,LW,22,,1980-09-26
5,Henrik,Sedin,VAN,C,33,,1980-09-26


By default any rows with errors will prevent any data from loading

In [6]:
players = pd.read_csv('hockey_players_with_errors.csv')
players

ParserError: Error tokenizing data. C error: Expected 7 fields in line 4, saw 8


Specify **error_bad_lines=False** to skip bad rows

In [7]:
players = pd.read_csv('hockey_players_with_errors.csv',
                     error_bad_lines=False)
players

b'Skipping line 4: expected 7 fields, saw 8\n'


Unnamed: 0,first_name,last_name,team,position,jersey_number,salary,birthdate
0,Joe,Pavelski,SJ,C,8,6000000.0,1984-07-11
1,Connor,McDavid,EDM,C,97,925000.0,1997-01-13
2,Carey,Price,MTL,G,31,10500000.0,1987-08-16
3,Daniel,Sedin,VAN,LW,22,,1980-09-26
4,Henrik,Sedin,VAN,C,33,,1980-09-26


You can also connect directly to a SQL Database and use **read_sql()** to populate your DataFrame using a query

In [21]:
import pyodbc

server = 'confoonhl.database.windows.net'
database = 'NHLdb'
username = 'sqladmin'
password = 'P@ssw0rd'
driver= '{ODBC Driver 17 for SQL Server}'
sql_conn = pyodbc.connect('DRIVER=' + driver +
                          ';SERVER=' + server +
                          ';PORT=1433;DATABASE=' + database +
                          ';UID=' + username +
                          ';PWD=' + password)

query = 'SELECT first_name, last_name, team, position, jersey_number, salary, birthdate FROM dbo.players'
players = pd.read_sql(query, sql_conn)

players

Unnamed: 0,first_name,last_name,team,position,jersey_number,salary,birthdate
0,Joe,Pavelski,SJ,C,8,6000000.0,1984-07-11
1,Connor,McDavid,EDM,C,97,925000.0,1997-01-13
2,Sidney,Crosby,PIT,C,87,87000000.0,1987-08-07
3,Daniel,Sedin,VAN,LW,22,,1980-09-26
4,Henrik,Sedin,VAN,C,33,,1980-09-26
