## Basic imports

In [1]:
import pandas as pd

## Sample data

In [2]:
projects = {
    'title': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
    'technology': ['C#', 'C#', 'Python', 'Python', 'Java', 'JavaScript', 'Python', 'Java'],
    'lines': [1000, 4000, 500, 750, 15000, 2500, 6000, 4500]
}

In [3]:
df = pd.DataFrame(projects)

In [4]:
df

Unnamed: 0,title,technology,lines
0,a,C#,1000
1,b,C#,4000
2,c,Python,500
3,d,Python,750
4,e,Java,15000
5,f,JavaScript,2500
6,g,Python,6000
7,h,Java,4500


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   title       8 non-null      object
 1   technology  8 non-null      object
 2   lines       8 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 324.0+ bytes


## Filter Rows by Column Value

In [6]:
df['lines']

0     1000
1     4000
2      500
3      750
4    15000
5     2500
6     6000
7     4500
Name: lines, dtype: int64

In [7]:
df['lines'] > 500

0     True
1     True
2    False
3     True
4     True
5     True
6     True
7     True
Name: lines, dtype: bool

In [8]:
df[df['lines'] > 500]

Unnamed: 0,title,technology,lines
0,a,C#,1000
1,b,C#,4000
3,d,Python,750
4,e,Java,15000
5,f,JavaScript,2500
6,g,Python,6000
7,h,Java,4500


In [9]:
df[df.lines > 500]

Unnamed: 0,title,technology,lines
0,a,C#,1000
1,b,C#,4000
3,d,Python,750
4,e,Java,15000
5,f,JavaScript,2500
6,g,Python,6000
7,h,Java,4500


## Matching strings

In [10]:
df[df.technology.str.contains('Java')]

Unnamed: 0,title,technology,lines
4,e,Java,15000
5,f,JavaScript,2500
7,h,Java,4500


In [11]:
df[df.technology.str.endswith('Java')]

Unnamed: 0,title,technology,lines
4,e,Java,15000
7,h,Java,4500


In [12]:
df[df.technology == 'Java']

Unnamed: 0,title,technology,lines
4,e,Java,15000
7,h,Java,4500


In [13]:
df[df['technology'] == 'Java']

Unnamed: 0,title,technology,lines
4,e,Java,15000
7,h,Java,4500


## Multiple criterias

In [14]:
df[(
    (df.technology.str.contains('Java')) 
    & 
    (df.lines <= 5000)
)]

Unnamed: 0,title,technology,lines
5,f,JavaScript,2500
7,h,Java,4500


In [15]:
df[df.technology.str.contains('Java') & (df.lines < 5000)]

Unnamed: 0,title,technology,lines
5,f,JavaScript,2500
7,h,Java,4500


In [16]:
# no () => not the result you expect
df[df.technology.str.contains('Java') & df.lines < 5000]

Unnamed: 0,title,technology,lines
0,a,C#,1000
1,b,C#,4000
2,c,Python,500
3,d,Python,750
4,e,Java,15000
5,f,JavaScript,2500
6,g,Python,6000
7,h,Java,4500


In [17]:
df[df.lines < 5000]

Unnamed: 0,title,technology,lines
0,a,C#,1000
1,b,C#,4000
2,c,Python,500
3,d,Python,750
5,f,JavaScript,2500
7,h,Java,4500


In [18]:
df[(df.technology.str.contains('Java') | df.technology.eq('Python')) & (df.lines < 5000)]

Unnamed: 0,title,technology,lines
2,c,Python,500
3,d,Python,750
5,f,JavaScript,2500
7,h,Java,4500


## Query() method

In [19]:
df.query('technology == "Python" and lines > 500')

Unnamed: 0,title,technology,lines
3,d,Python,750
6,g,Python,6000


In [20]:
df.query('technology == "Java" or lines > 5000')

Unnamed: 0,title,technology,lines
4,e,Java,15000
6,g,Python,6000
7,h,Java,4500


## Filter() method

In [21]:
df.filter(items=['title', 'lines'], axis=1)

Unnamed: 0,title,lines
0,a,1000
1,b,4000
2,c,500
3,d,750
4,e,15000
5,f,2500
6,g,6000
7,h,4500


In [22]:
df.filter(items=['title', 'lines'], axis=0)

Unnamed: 0,title,technology,lines


In [23]:
df.filter(items=[1, 2, 3], axis=0)

Unnamed: 0,title,technology,lines
1,b,C#,4000
2,c,Python,500
3,d,Python,750


In [24]:
df.filter(like='t')

Unnamed: 0,title,technology
0,a,C#
1,b,C#
2,c,Python
3,d,Python
4,e,Java
5,f,JavaScript
6,g,Python
7,h,Java
