In [1]:
pip install pandas



In [2]:
import pandas as pd

-----------------------------
## 1. SERIES
----------------------------

* A pandas series is like a single column of data in a spreadsheet.
* It is a one-dimensional array that can hold many types of data such as numbers, words or even other Python objects.
* Each value in a Series is associated with an index, which makes data retrieval and manipulation easy.

In [3]:
ser = pd.Series()
#empty series
print(ser)

Series([], dtype: object)


In [5]:
# creating series from numpy array

import pandas as pd
import numpy as np

data = np.array(['g', 'e', 'e', 'k', 's'])

ser = pd.Series(data)
print(ser)

0    g
1    e
2    e
3    k
4    s
dtype: object


In [6]:
# creating a series from python list

data_list = ['a', 'b', 'c', 'd', 'e']
ser = pd.Series(data_list)
print(ser)

0    a
1    b
2    c
3    d
4    e
dtype: object


In [7]:
# creating a series from python dictionary
# in this case index of series are keys of dict

data_dict = {
    'a' : 10,
    'b' : 20,
    'c' : 30
}

ser = pd.Series(data_dict)
print(ser)

a    10
b    20
c    30
dtype: int64


In [9]:
# creating series using range function

ser = pd.Series(range(5,15,2))
print(ser)

0     5
1     7
2     9
3    11
4    13
dtype: int64


In [12]:
# creating series using list comprehensions
# we can also manipulate index as per requirement

ser = pd.Series(range(1,6),index = [x for x in 'abcde'])
print(ser)

a    1
b    2
c    3
d    4
e    5
dtype: int64


In [13]:
import numpy as np

ser = pd.Series(np.linspace(1, 10, 5), index = [x for x in range(1,6)] )
print(ser)

1     1.00
2     3.25
3     5.50
4     7.75
5    10.00
dtype: float64


In [17]:
# Accessing series elements using index operator ser[i]

ser = pd.Series(range(0,10,2))
print(ser)

0    0
1    2
2    4
3    6
4    8
dtype: int64


In [25]:
print(f'''
0 index : {ser[0]}
------------------
0 to 2 index :
{ser[0:3]}
------------------
last element : {ser[-1:]}
------------------
last 2 elemnts :
{ser[-2:]}
''')


0 index : 0
------------------
0 to 2 index :
0    0
1    2
2    4
dtype: int64
------------------
last element : 4    8
dtype: int64
------------------
last 2 elemnts :
3    6
4    8
dtype: int64



----------------------------
## 2. DATAFRAMES
----------------------------
* A Pandas DataFrame is a two-dimensional table-like structure in Python where data is arranged in rows and columns.
* The main parts of a DataFrame are:
        Data: Actual values in the table.
        Rows: Labels that identify each row.
        Columns: Labels that define each data category.

In [3]:
lst = ['kuldeep', 'Vedant', 'Raghav', 'Rahul']

df = pd.DataFrame(lst)
print(df)

         0
0  kuldeep
1   Vedant
2   Raghav
3    Rahul


In [4]:
data = {'Name':['Tom', 'nick', 'krish', 'jack'],
        'Age':[20, 21, 19, 18]}

df = pd.DataFrame(data)
print(df)

    Name  Age
0    Tom   20
1   nick   21
2  krish   19
3   jack   18


In [5]:
import numpy as np
import pandas as pd

data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df = pd.DataFrame(data, columns=['A', 'B', 'C'])
print(df)

   A  B  C
0  1  2  3
1  4  5  6
2  7  8  9


In [7]:
data = {
    'Car' : ['Verna', 'M5', 'Curve', 'BE-6'],
    'Company': ['Hyundai', 'BMW', 'Tata', 'Mahindra'],
    'Top_speed' : [160, 200, 150, 140]
}

df = pd.DataFrame(data)
print(df)

     Car   Company  Top_speed
0  Verna   Hyundai        160
1     M5       BMW        200
2  Curve      Tata        150
3   BE-6  Mahindra        140


## Working With Rows and Columns in Pandas DataFrame

### 1.Column Selection : selected by label
### 2.Row Selection : unique method loc()

In [14]:
data = {'Name':['Jai', 'Princi', 'Gaurav', 'Anuj'],
        'Age':[27, 24, 22, 32],
        'Address':['Delhi', 'Kanpur', 'Allahabad', 'Kannauj'],
        'Qualification':['Msc', 'MA', 'MCA', 'Phd']}

df = pd.DataFrame(data)
print(df[['Name', 'Age']])

     Name  Age
0     Jai   27
1  Princi   24
2  Gaurav   22
3    Anuj   32


In [17]:
    from google.colab import drive
    drive.mount('/content/drive')

Mounted at /content/drive


In [21]:
file_path = '/content/drive/MyDrive/Colab Notebooks/nba.csv'
df = pd.read_csv(file_path,index_col='Name')
# Display the first few rows of the DataFrame
print(df.head())

                         Team  Number Position   Age Height  Weight  \
Name                                                                  
Avery Bradley  Boston Celtics     0.0       PG  25.0    6-2   180.0   
Jae Crowder    Boston Celtics    99.0       SF  25.0    6-6   235.0   
John Holland   Boston Celtics    30.0       SG  27.0    6-5   205.0   
R.J. Hunter    Boston Celtics    28.0       SG  22.0    6-5   185.0   
Jonas Jerebko  Boston Celtics     8.0       PF  29.0   6-10   231.0   

                         College     Salary  
Name                                         
Avery Bradley              Texas  7730337.0  
Jae Crowder            Marquette  6796117.0  
John Holland   Boston University        NaN  
R.J. Hunter        Georgia State  1148640.0  
Jonas Jerebko                NaN  5000000.0  


In [22]:
first = df.loc["Avery Bradley"]
second = df.loc["R.J. Hunter"]

print(first, "\n\n\n", second)

Team        Boston Celtics
Number                 0.0
Position                PG
Age                   25.0
Height                 6-2
Weight               180.0
College              Texas
Salary           7730337.0
Name: Avery Bradley, dtype: object 


 Team        Boston Celtics
Number                28.0
Position                SG
Age                   22.0
Height                 6-5
Weight               185.0
College      Georgia State
Salary           1148640.0
Name: R.J. Hunter, dtype: object


In [23]:
# Acessing data using index operator[]
first = df["Age"]
print(first)

Name
Avery Bradley    25.0
Jae Crowder      25.0
John Holland     27.0
R.J. Hunter      22.0
Jonas Jerebko    29.0
                 ... 
Shelvin Mack     26.0
Raul Neto        24.0
Tibor Pleiss     26.0
Jeff Withey      26.0
NaN               NaN
Name: Age, Length: 458, dtype: float64


In [29]:
# Accesing data using iloc()
# iloc[] take int(index or row number - 1) whereas loc[] takes label (like name)
df.iloc[2]

Unnamed: 0,John Holland
Team,Boston Celtics
Number,30.0
Position,SG
Age,27.0
Height,6-5
Weight,205.0
College,Boston University
Salary,


In [26]:
# it checks the null data
df.isnull()

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,False,False,False,False,False,False,False,False
Jae Crowder,False,False,False,False,False,False,False,False
John Holland,False,False,False,False,False,False,False,True
R.J. Hunter,False,False,False,False,False,False,False,False
Jonas Jerebko,False,False,False,False,False,False,True,False
...,...,...,...,...,...,...,...,...
Shelvin Mack,False,False,False,False,False,False,False,False
Raul Neto,False,False,False,False,False,False,True,False
Tibor Pleiss,False,False,False,False,False,False,True,False
Jeff Withey,False,False,False,False,False,False,False,False


* Filling null values with
        -> fillna()
        -> replace()
        -> interpolate()

In [28]:
import numpy as np

dict = {'First Score':[100, 90, np.nan, 95],
        'Second Score': [30, 45, 56, np.nan],
        'Third Score':[np.nan, 40, 80, 98]}
df1 = pd.DataFrame(dict)

df1.fillna(0)

Unnamed: 0,First Score,Second Score,Third Score
0,100.0,30.0,0.0
1,90.0,45.0,40.0
2,0.0,56.0,80.0
3,95.0,0.0,98.0


In [32]:
df1.replace(30,100)

Unnamed: 0,First Score,Second Score,Third Score
0,100.0,100.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [36]:
data = {
    'Car' : ['Verna', 'M5', 'Curve', 'BE-6'],
    'Company': ['Hyundai', 'BMW', 'Tata', 'Mahindra'],
    'Top_speed' : [160, 200, 150, 140]
}

df2 = pd.DataFrame(data)

for i,j in df2.iterrows():
    print(i,j)
    print()


0 Car            Verna
Company      Hyundai
Top_speed        160
Name: 0, dtype: object

1 Car           M5
Company      BMW
Top_speed    200
Name: 1, dtype: object

2 Car          Curve
Company       Tata
Top_speed      150
Name: 2, dtype: object

3 Car              BE-6
Company      Mahindra
Top_speed         140
Name: 3, dtype: object



In [38]:
columns = list(df2) #returns the columns labels
for i in columns:
    print(df2[i][3])

BE-6
Mahindra
140


In [40]:
file_path = '/content/drive/MyDrive/Colab Notebooks/nba.csv'
df = pd.read_csv(file_path,index_col='Name')
# Display the first few rows of the DataFrame
print(df.head())

                         Team  Number Position   Age Height  Weight  \
Name                                                                  
Avery Bradley  Boston Celtics     0.0       PG  25.0    6-2   180.0   
Jae Crowder    Boston Celtics    99.0       SF  25.0    6-6   235.0   
John Holland   Boston Celtics    30.0       SG  27.0    6-5   205.0   
R.J. Hunter    Boston Celtics    28.0       SG  22.0    6-5   185.0   
Jonas Jerebko  Boston Celtics     8.0       PF  29.0   6-10   231.0   

                         College     Salary  
Name                                         
Avery Bradley              Texas  7730337.0  
Jae Crowder            Marquette  6796117.0  
John Holland   Boston University        NaN  
R.J. Hunter        Georgia State  1148640.0  
Jonas Jerebko                NaN  5000000.0  


In [41]:
# index operator
df[["Age", "College", "Salary"]]

Unnamed: 0_level_0,Age,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Avery Bradley,25.0,Texas,7730337.0
Jae Crowder,25.0,Marquette,6796117.0
John Holland,27.0,Boston University,
R.J. Hunter,22.0,Georgia State,1148640.0
Jonas Jerebko,29.0,,5000000.0
...,...,...,...
Shelvin Mack,26.0,Butler,2433333.0
Raul Neto,24.0,,900000.0
Tibor Pleiss,26.0,,2900000.0
Jeff Withey,26.0,Kansas,947276.0


In [42]:
# using labels loc[]
df.loc[["LeBron James", "Stephen Curry"]]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
LeBron James,Cleveland Cavaliers,23.0,SF,31.0,6-8,250.0,,22970500.0
Stephen Curry,Golden State Warriors,30.0,PG,28.0,6-3,190.0,Davidson,11370786.0


In [43]:
#Rows and columns subset
df.loc[["LeBron James", "Kevin Durant"], ["Team", "Position", "Salary"]]

Unnamed: 0_level_0,Team,Position,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
LeBron James,Cleveland Cavaliers,SF,22970500.0
Kevin Durant,Oklahoma City Thunder,SF,20158622.0


In [44]:
# all rows and some columns
df.loc[:, ["Team", "Position", "Salary"]]

Unnamed: 0_level_0,Team,Position,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Avery Bradley,Boston Celtics,PG,7730337.0
Jae Crowder,Boston Celtics,SF,6796117.0
John Holland,Boston Celtics,SG,
R.J. Hunter,Boston Celtics,SG,1148640.0
Jonas Jerebko,Boston Celtics,PF,5000000.0
...,...,...,...
Shelvin Mack,Utah Jazz,PG,2433333.0
Raul Neto,Utah Jazz,PG,900000.0
Tibor Pleiss,Utah Jazz,C,2900000.0
Jeff Withey,Utah Jazz,C,947276.0


In [46]:
# few rows and few columns
df.loc["Avery Bradley":"Jonas Jerebko",["Team", "Position", "Salary"]]

Unnamed: 0_level_0,Team,Position,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Avery Bradley,Boston Celtics,PG,7730337.0
Jae Crowder,Boston Celtics,SF,6796117.0
John Holland,Boston Celtics,SG,
R.J. Hunter,Boston Celtics,SG,1148640.0
Jonas Jerebko,Boston Celtics,PF,5000000.0


In [47]:
# using iloc[] operator
df.iloc[3]

Unnamed: 0,R.J. Hunter
Team,Boston Celtics
Number,28.0
Position,SG
Age,22.0
Height,6-5
Weight,185.0
College,Georgia State
Salary,1148640.0


In [48]:
# slicing in iloc
df.iloc[0:5]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
R.J. Hunter,Boston Celtics,28.0,SG,22.0,6-5,185.0,Georgia State,1148640.0
Jonas Jerebko,Boston Celtics,8.0,PF,29.0,6-10,231.0,,5000000.0


In [49]:
#rows and columns by position
df.iloc[0:5, 2:5]

Unnamed: 0_level_0,Position,Age,Height
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Avery Bradley,PG,25.0,6-2
Jae Crowder,SF,25.0,6-6
John Holland,SG,27.0,6-5
R.J. Hunter,SG,22.0,6-5
Jonas Jerebko,PF,29.0,6-10


-> Boolean indexing (single and multiple conditions)

In [51]:
df[df["Salary"] > 5000000]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Avery Bradley,Boston Celtics,0.0,PG,25.0,6-2,180.0,Texas,7730337.0
Jae Crowder,Boston Celtics,99.0,SF,25.0,6-6,235.0,Marquette,6796117.0
Amir Johnson,Boston Celtics,90.0,PF,29.0,6-9,240.0,,12000000.0
Isaiah Thomas,Boston Celtics,4.0,PG,27.0,5-9,185.0,Washington,6912869.0
Jarrett Jack,Brooklyn Nets,2.0,PG,32.0,6-3,200.0,Georgia Tech,6300000.0
...,...,...,...,...,...,...,...,...
Gerald Henderson,Portland Trail Blazers,9.0,SG,28.0,6-5,215.0,Duke,6000000.0
Chris Kaman,Portland Trail Blazers,35.0,C,34.0,7-0,265.0,Central Michigan,5016000.0
Alec Burks,Utah Jazz,10.0,SG,24.0,6-6,214.0,Colorado,9463484.0
Derrick Favors,Utah Jazz,15.0,PF,24.0,6-10,265.0,Georgia Tech,12000000.0


In [52]:
df[(df["Age"] < 25) & (df["Salary"] > 3000000)]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Marcus Smart,Boston Celtics,36.0,PG,22.0,6-4,220.0,Oklahoma State,3431040.0
Kristaps Porzingis,New York Knicks,6.0,PF,20.0,7-3,240.0,,4131720.0
Joel Embiid,Philadelphia 76ers,21.0,C,22.0,7-0,250.0,Kansas,4626960.0
Nerlens Noel,Philadelphia 76ers,4.0,PF,22.0,6-11,228.0,Kentucky,3457800.0
Jahlil Okafor,Philadelphia 76ers,8.0,C,20.0,6-11,275.0,Duke,4582680.0
Cory Joseph,Toronto Raptors,6.0,PG,24.0,6-3,190.0,Texas,7000000.0
Jonas Valanciunas,Toronto Raptors,17.0,C,24.0,7-0,255.0,,4660482.0
Harrison Barnes,Golden State Warriors,40.0,SF,24.0,6-8,225.0,North Carolina,3873398.0
Austin Rivers,Los Angeles Clippers,25.0,PG,23.0,6-4,200.0,Duke,3110796.0
Julius Randle,Los Angeles Lakers,30.0,PF,21.0,6-9,250.0,Kentucky,3132240.0


In [55]:
# UPDATING VALUES
df.loc["LeBron James", "Position"] = "Power Forward"
df.loc["LeBron James"]

Unnamed: 0,LeBron James
Team,Cleveland Cavaliers
Number,23.0
Position,Power Forward
Age,31.0
Height,6-8
Weight,250.0
College,
Salary,22970500.0


In [56]:
df.describe()

Unnamed: 0,Number,Age,Weight,Salary
count,457.0,457.0,457.0,446.0
mean,17.678337,26.938731,221.522976,4842684.0
std,15.96609,4.404016,26.368343,5229238.0
min,0.0,19.0,161.0,30888.0
25%,5.0,24.0,200.0,1044792.0
50%,13.0,26.0,220.0,2839073.0
75%,25.0,30.0,240.0,6500000.0
max,99.0,40.0,307.0,25000000.0


In [57]:
df.Weight.describe()

Unnamed: 0,Weight
count,457.0
mean,221.522976
std,26.368343
min,161.0
25%,200.0
50%,220.0
75%,240.0
max,307.0


In [59]:
df.Team.describe()

Unnamed: 0,Team
count,457
unique,30
top,New Orleans Pelicans
freq,19


In [61]:
df.Team.unique()

array(['Boston Celtics', 'Brooklyn Nets', 'New York Knicks',
       'Philadelphia 76ers', 'Toronto Raptors', 'Golden State Warriors',
       'Los Angeles Clippers', 'Los Angeles Lakers', 'Phoenix Suns',
       'Sacramento Kings', 'Chicago Bulls', 'Cleveland Cavaliers',
       'Detroit Pistons', 'Indiana Pacers', 'Milwaukee Bucks',
       'Dallas Mavericks', 'Houston Rockets', 'Memphis Grizzlies',
       'New Orleans Pelicans', 'San Antonio Spurs', 'Atlanta Hawks',
       'Charlotte Hornets', 'Miami Heat', 'Orlando Magic',
       'Washington Wizards', 'Denver Nuggets', 'Minnesota Timberwolves',
       'Oklahoma City Thunder', 'Portland Trail Blazers', 'Utah Jazz',
       nan], dtype=object)

In [63]:
df.Team.value_counts()

Unnamed: 0_level_0,count
Team,Unnamed: 1_level_1
New Orleans Pelicans,19
Memphis Grizzlies,18
New York Knicks,16
Milwaukee Bucks,16
Brooklyn Nets,15
Boston Celtics,15
Los Angeles Clippers,15
Los Angeles Lakers,15
Phoenix Suns,15
Sacramento Kings,15


In [64]:
avg_salary = df.Salary.mean()
print(avg_salary)

4842684.105381166


In [66]:
df.Salary.map(lambda x: x - avg_salary)

Unnamed: 0_level_0,Salary
Name,Unnamed: 1_level_1
Avery Bradley,2887653.0
Jae Crowder,1953433.0
John Holland,
R.J. Hunter,-3694044.0
Jonas Jerebko,157315.9


In [69]:
df.groupby('Team').Salary.min()

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
Atlanta Hawks,525093.0
Boston Celtics,1148640.0
Brooklyn Nets,134215.0
Charlotte Hornets,189455.0
Chicago Bulls,525093.0
Cleveland Cavaliers,111196.0
Dallas Mavericks,525093.0
Denver Nuggets,258489.0
Detroit Pistons,111444.0
Golden State Warriors,289755.0


In [70]:
df.groupby('Team').Salary.max()

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
Atlanta Hawks,18671659.0
Boston Celtics,12000000.0
Brooklyn Nets,19689000.0
Charlotte Hornets,13500000.0
Chicago Bulls,20093064.0
Cleveland Cavaliers,22970500.0
Dallas Mavericks,16407500.0
Denver Nuggets,14000000.0
Detroit Pistons,16000000.0
Golden State Warriors,15501000.0


In [71]:
df.groupby('Team').Salary.mean()

Unnamed: 0_level_0,Salary
Team,Unnamed: 1_level_1
Atlanta Hawks,4860197.0
Boston Celtics,4181505.0
Brooklyn Nets,3501898.0
Charlotte Hornets,5222728.0
Chicago Bulls,5785559.0
Cleveland Cavaliers,7642049.0
Dallas Mavericks,4746582.0
Denver Nuggets,4294424.0
Detroit Pistons,4477884.0
Golden State Warriors,5924600.0


In [74]:
df.groupby('Team').apply(lambda df : df.Number.iloc[0])

  df.groupby('Team').apply(lambda df : df.Number.iloc[0])


Unnamed: 0_level_0,0
Team,Unnamed: 1_level_1
Atlanta Hawks,24.0
Boston Celtics,0.0
Brooklyn Nets,44.0
Charlotte Hornets,5.0
Chicago Bulls,41.0
Cleveland Cavaliers,8.0
Dallas Mavericks,1.0
Denver Nuggets,0.0
Detroit Pistons,50.0
Golden State Warriors,19.0


In [78]:
df.groupby('Team').Height.agg([len, min, max])

  df.groupby('Team').Height.agg([len, min, max])
  df.groupby('Team').Height.agg([len, min, max])


Unnamed: 0_level_0,len,min,max
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Atlanta Hawks,15,6-1,7-3
Boston Celtics,15,5-9,7-0
Brooklyn Nets,15,5-11,7-0
Charlotte Hornets,15,6-1,7-1
Chicago Bulls,15,6-0,7-0
Cleveland Cavaliers,15,6-1,7-1
Dallas Mavericks,15,6-0,7-2
Denver Nuggets,15,6-0,7-0
Detroit Pistons,15,6-10,6-9
Golden State Warriors,15,6-10,7-0


In [80]:
df.groupby(['Team', 'College']).Salary.agg([len, min, max]).head()

  df.groupby(['Team', 'College']).Salary.agg([len, min, max]).head()
  df.groupby(['Team', 'College']).Salary.agg([len, min, max]).head()


Unnamed: 0_level_0,Unnamed: 1_level_0,len,min,max
Team,College,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Atlanta Hawks,Bucknell,1,947276.0,947276.0
Atlanta Hawks,Creighton,1,5746479.0,5746479.0
Atlanta Hawks,Florida,1,12000000.0,12000000.0
Atlanta Hawks,Kansas,1,2854940.0,2854940.0
Atlanta Hawks,Louisiana Tech,1,18671659.0,18671659.0


In [81]:
df.groupby(["Team", "College"])["Salary"].agg(["count", "min", "max", "mean"]).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min,max,mean
Team,College,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Atlanta Hawks,Bucknell,1,947276.0,947276.0,947276.0
Atlanta Hawks,Creighton,1,5746479.0,5746479.0,5746479.0
Atlanta Hawks,Florida,1,12000000.0,12000000.0,12000000.0
Atlanta Hawks,Kansas,1,2854940.0,2854940.0,2854940.0
Atlanta Hawks,Louisiana Tech,1,18671659.0,18671659.0,18671659.0


In [85]:
df.sort_values(by = 'Salary').head()

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Thanasis Antetokounmpo,New York Knicks,43.0,SF,23.0,6-7,205.0,,30888.0
Orlando Johnson,New Orleans Pelicans,0.0,SG,27.0,6-5,220.0,UC Santa Barbara,55722.0
Phil Pressey,Phoenix Suns,25.0,PG,25.0,5-11,175.0,Missouri,55722.0
Alan Williams,Phoenix Suns,15.0,C,23.0,6-8,260.0,UC Santa Barbara,83397.0
Jordan McRae,Cleveland Cavaliers,12.0,SG,25.0,6-5,179.0,Tennessee,111196.0


In [92]:
df.sort_values(by = 'Salary', ascending=False).head()

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Kobe Bryant,Los Angeles Lakers,24.0,SF,37.0,6-6,212.0,,25000000.0
LeBron James,Cleveland Cavaliers,23.0,Power Forward,31.0,6-8,250.0,,22970500.0
Carmelo Anthony,New York Knicks,7.0,SF,32.0,6-8,240.0,Syracuse,22875000.0
Dwight Howard,Houston Rockets,12.0,C,30.0,6-11,265.0,,22359364.0
Chris Bosh,Miami Heat,1.0,PF,32.0,6-11,235.0,Georgia Tech,22192730.0


In [96]:
df.dtypes

Unnamed: 0,0
Team,object
Number,float64
Position,object
Age,float64
Height,object
Weight,float64
College,object
Salary,float64


In [97]:
df[pd.isnull(df['Salary'])]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
John Holland,Boston Celtics,30.0,SG,27.0,6-5,205.0,Boston University,
Elton Brand,Philadelphia 76ers,42.0,PF,37.0,6-9,254.0,Duke,
Dahntay Jones,Cleveland Cavaliers,30.0,SG,35.0,6-6,225.0,Duke,
Jordan Farmar,Memphis Grizzlies,4.0,PG,29.0,6-2,180.0,UCLA,
Ray McCallum,Memphis Grizzlies,5.0,PG,24.0,6-3,190.0,Detroit,
Xavier Munford,Memphis Grizzlies,14.0,PG,24.0,6-3,180.0,Rhode Island,
Alex Stepheson,Memphis Grizzlies,35.0,PF,28.0,6-10,270.0,USC,
Briante Weber,Miami Heat,12.0,PG,23.0,6-2,165.0,Virginia Commonwealth,
Dorell Wright,Miami Heat,11.0,SF,30.0,6-9,205.0,,
Axel Toupane,Denver Nuggets,6.0,SG,23.0,6-7,210.0,,


In [101]:
df.Salary.fillna(0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df.Salary.fillna(0, inplace=True)


In [102]:
df[pd.isnull(df['Salary'])]

Unnamed: 0_level_0,Team,Number,Position,Age,Height,Weight,College,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
