### Series
- 1D array-like object that can hold many data types. One of the main differences between Pandas Series and NumPy ndarrays is that you can assign an index label to each element in the Pandas Series. Another big difference is that Pandas Series can hold data of different data types.

In [1]:
import pandas as pd

In [2]:
groceries = pd.Series(data = [30, 6, 'Yes', 'No'], index = ['eggs', 'apples', 'milk', 'bread'])
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [3]:
groceries['eggs']

30

In [17]:
groceries[0]

30

In [4]:
groceries['bread']

'No'

In [5]:
# Accessing Elements
# using index labels:
# single index label
print('How many eggs do we need to buy:', groceries['eggs'])

How many eggs do we need to buy: 30


In [None]:
# access multiple index labels
print('Do we need milk and bread:\n', groceries[['milk', 'bread']])

In [8]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [7]:
groceries[['eggs', 'apples']]

eggs      30
apples     6
dtype: object

In [None]:

# use loc to access multiple index labels
print('How many eggs and apples do we need to buy:\n', groceries.loc[['eggs', 'apples']])

In [9]:
groceries.loc[['eggs', 'apples']]

eggs      30
apples     6
dtype: object

In [11]:
# access elements in Groceries using numerical indices:
# use multiple numerical indices
print('How many eggs and apples do we need to buy:\n',  groceries[[0, 1]])

How many eggs and apples do we need to buy:
 eggs      30
apples     6
dtype: object


In [10]:
groceries[[0, 1]]

eggs      30
apples     6
dtype: object

In [13]:
# use a negative numerical index
print('Do we need bread:\n', groceries[[-1]])

Do we need bread:
 bread    No
dtype: object


In [12]:
groceries[[-1]]

bread    No
dtype: object

In [None]:
# use a single numerical index
print('How many eggs do we need to buy:', groceries[0])

In [16]:
groceries[0:2]

eggs      30
apples     6
dtype: object

In [None]:
# use iloc (stands for integer location) to access multiple numerical indices
print('Do we need milk and bread:\n', groceries.iloc[[2, 3]])

In [18]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [15]:
groceries.iloc[[2, 3]]

milk     Yes
bread     No
dtype: object

In [19]:
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [20]:
groceries['eggs'] = 2
groceries

eggs        2
apples      6
milk      Yes
bread      No
dtype: object

In [21]:
groceries

eggs        2
apples      6
milk      Yes
bread      No
dtype: object

In [25]:
x = groceries.drop('apples')#, inplace = True)
x

eggs       2
milk     Yes
bread     No
dtype: object

In [26]:
groceries

eggs        2
apples      6
milk      Yes
bread      No
dtype: object

In [28]:
groceries.drop('apples', inplace = True)

In [29]:
groceries

eggs       2
milk     Yes
bread     No
dtype: object

In [None]:
##### Create series from Python dictionary

In [30]:
my_dictionary = {'a' : 45., 'b' : -19.5, 'c' : 4444}





my_second_series = pd.Series(my_dictionary)
my_second_series

a      45.0
b     -19.5
c    4444.0
dtype: float64

In [None]:
##Access a series like a dictionary

In [31]:
my_second_series['b']

-19.5

In [32]:
my_second_series.get('a')

45.0

In [33]:
my_second_series['E'] = 20

In [34]:
my_second_series

a      45.0
b     -19.5
c    4444.0
E      20.0
dtype: float64

### DataFrame
- Pandas DataFrames are two-dimensional data structures with labeled rows and columns, that can hold many data types.

In [35]:
# create a dictionary of Pandas Series
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

shopping_carts = pd.DataFrame(items)

In [36]:
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [37]:
players = {
    'P1_Points_per_game': [7,8,9,9,10,10,11,11,12,13],
    'P2_Points_per_game': [7,9,9,10,10,10,10,11,11,13],
    'P3_Points_per_game': [1,4,6,6,10,10,10,11,12,30]
}

In [38]:
df = pd.DataFrame(players)
df

Unnamed: 0,P1_Points_per_game,P2_Points_per_game,P3_Points_per_game
0,7,7,1
1,8,9,4
2,9,9,6
3,9,10,6
4,10,10,10
5,10,10,10
6,11,10,10
7,11,11,11
8,12,11,12
9,13,13,30


#### Loading Data into DF

In [39]:
# Loading Data into DF
import pandas as pd 

df = pd.read_csv('Titanic.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [44]:
df.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [40]:
df.shape

(891, 12)

In [43]:
df.head(7)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S


In [46]:
df.tail(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [47]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


#### Basic_statistics

In [48]:
df.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


##### selection using column name

In [50]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [53]:
df[['Age']].describe()

Unnamed: 0,Age
count,714.0
mean,29.699118
std,14.526497
min,0.42
25%,20.125
50%,28.0
75%,38.0
max,80.0


In [55]:
df[['Sex', 'Age']]

Unnamed: 0,Sex,Age
0,male,22.0
1,female,38.0
2,female,26.0
3,female,35.0
4,male,35.0
...,...,...
886,male,27.0
887,female,19.0
888,female,
889,male,26.0


In [None]:
df['Age']

In [None]:
type(df['Age'])

In [None]:
df[['Age']]

In [None]:
type(df[['Age']])

In [None]:
df[['Survived', 'Fare']]

##### selection using slice
- remember: up to, but not including second index

In [56]:
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [57]:
df[1:4]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S


##### Selection by label

In [None]:
df.loc[:,['Name','Age']]

In [62]:
df2 = df.loc[ 1: 4 ,  ['Name','Age'] ] 

In [63]:
df2

Unnamed: 0,Name,Age
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0
2,"Heikkinen, Miss. Laina",26.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
4,"Allen, Mr. William Henry",35.0


In [58]:
df.loc[:,['Name','Age']]

Unnamed: 0,Name,Age
0,"Braund, Mr. Owen Harris",22.0
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",38.0
2,"Heikkinen, Miss. Laina",26.0
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
4,"Allen, Mr. William Henry",35.0
...,...,...
886,"Montvila, Rev. Juozas",27.0
887,"Graham, Miss. Margaret Edith",19.0
888,"Johnston, Miss. Catherine Helen ""Carrie""",
889,"Behr, Mr. Karl Howell",26.0


In [71]:
df.loc[3:5,['Name','Age']]

Unnamed: 0,Name,Age
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
4,"Allen, Mr. William Henry",35.0
5,"Moran, Mr. James",


In [64]:
df.loc[3:5,['Name','Age']]

Unnamed: 0,Name,Age
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",35.0
4,"Allen, Mr. William Henry",35.0
5,"Moran, Mr. James",


In [65]:
df.loc[5:7,['Name','Age']]

Unnamed: 0,Name,Age
5,"Moran, Mr. James",
6,"McCarthy, Mr. Timothy J",54.0
7,"Palsson, Master. Gosta Leonard",2.0


##### integer slices


In [66]:
#iloc(error)
df.iloc[5:7,['Name','Age']]

IndexError: .iloc requires numeric indexers, got ['Name' 'Age']

In [67]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [72]:
df3 = df.iloc[5:7  ,  0:3]

In [73]:
df3

Unnamed: 0,PassengerId,Survived,Pclass
5,6,0,3
6,7,0,1


In [75]:
df.iloc[5  ,:]

PassengerId                   6
Survived                      0
Pclass                        3
Name           Moran, Mr. James
Sex                        male
Age                         NaN
SibSp                         0
Parch                         0
Ticket                   330877
Fare                     8.4583
Cabin                       NaN
Embarked                      Q
Name: 5, dtype: object

##### Boolean Indexing

In [76]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [None]:
['first name']   df.first_name

In [77]:
df['Age']

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

In [79]:
df.Age

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

In [81]:
df.head(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C


In [83]:
df.Age

0      22.0
1      38.0
2      26.0
3      35.0
4      35.0
       ... 
886    27.0
887    19.0
888     NaN
889    26.0
890    32.0
Name: Age, Length: 891, dtype: float64

In [82]:
df.Age >= 50

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Age, Length: 891, dtype: bool

In [85]:
df4 = df[df.Age >= 50]

In [91]:
df.shape

(891, 12)

In [90]:
df4.shape

(74, 12)

In [87]:
df[df.Age >= 50]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
15,16,1,2,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706,16.0000,,S
33,34,0,2,"Wheadon, Mr. Edward H",male,66.0,0,0,C.A. 24579,10.5000,,S
54,55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C
...,...,...,...,...,...,...,...,...,...,...,...,...
820,821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gr...",female,52.0,1,1,12749,93.5000,B69,S
829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0000,B28,
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
857,858,1,1,"Daly, Mr. Peter Denis",male,51.0,0,0,113055,26.5500,E17,S


In [89]:
df['Age'] >= 50

0      False
1      False
2      False
3      False
4      False
       ...  
886    False
887    False
888    False
889    False
890    False
Name: Age, Length: 891, dtype: bool

In [None]:
df[]

In [88]:
df[df['Age'] >= 50]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
11,12,1,1,"Bonnell, Miss. Elizabeth",female,58.0,0,0,113783,26.5500,C103,S
15,16,1,2,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706,16.0000,,S
33,34,0,2,"Wheadon, Mr. Edward H",male,66.0,0,0,C.A. 24579,10.5000,,S
54,55,0,1,"Ostby, Mr. Engelhart Cornelius",male,65.0,0,1,113509,61.9792,B30,C
...,...,...,...,...,...,...,...,...,...,...,...,...
820,821,1,1,"Hays, Mrs. Charles Melville (Clara Jennings Gr...",female,52.0,1,1,12749,93.5000,B69,S
829,830,1,1,"Stone, Mrs. George Nelson (Martha Evelyn)",female,62.0,0,0,113572,80.0000,B28,
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
857,858,1,1,"Daly, Mr. Peter Denis",male,51.0,0,0,113055,26.5500,E17,S


#### Sort by data within a column

In [98]:
df.sort_values(by=['Age'], ascending=False)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
859,860,0,3,"Razi, Mr. Raihed",male,,0,0,2629,7.2292,,C
863,864,0,3,"Sage, Miss. Dorothy Edith ""Dolly""",female,,8,2,CA. 2343,69.5500,,S
868,869,0,3,"van Melkebeke, Mr. Philemon",male,,0,0,345777,9.5000,,S
878,879,0,3,"Laleff, Mr. Kristo",male,,0,0,349217,7.8958,,S


In [99]:
df.sort_values(by=['Age', 'Fare','Survived'], ascending=False)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
630,631,1,1,"Barkworth, Mr. Algernon Henry Wilson",male,80.0,0,0,27042,30.0000,A23,S
851,852,0,3,"Svensson, Mr. Johan",male,74.0,0,0,347060,7.7750,,S
493,494,0,1,"Artagaveytia, Mr. Ramon",male,71.0,0,0,PC 17609,49.5042,,C
96,97,0,1,"Goldschmidt, Mr. George B",male,71.0,0,0,PC 17754,34.6542,A5,C
116,117,0,3,"Connors, Mr. Patrick",male,70.5,0,0,370369,7.7500,,Q
...,...,...,...,...,...,...,...,...,...,...,...,...
481,482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,,0,0,239854,0.0000,,S
633,634,0,1,"Parr, Mr. William Henry Marsh",male,,0,0,112052,0.0000,,S
674,675,0,2,"Watson, Mr. Ennis Hastings",male,,0,0,239856,0.0000,,S
732,733,0,2,"Knight, Mr. Robert J",male,,0,0,239855,0.0000,,S


In [None]:
df.sort_values(by=['Age'], ascending=False)

# Ex1 - Filtering and Sorting Data

- Step 1. Import the necessary libraries
- Step 2. Import the dataset from this address(https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/02_Filtering_%26_Sorting/Euro12/Euro_2012_stats_TEAM.csv) (pd.read_csv(url, sep=','))

- Step 3. Assign it to a variable called euro12.
- Step 4. Select only the Goal column.
- Step 5. How many team participated in the Euro2012?
- Step 6. What is the number of columns in the dataset?
- Step 7. View only the columns Team, Yellow Cards and Red Cards and assign them to a dataframe called discipline
- Step 8. Sort the teams by Red Cards, then to Yellow Cards
- Step 9. Calculate the mean Yellow Cards given per Team
- Step 10. Filter teams that scored more than 6 goals
- Step 11. Select the first 7 columns
- Step 12. Select all columns except the last 3.
- Step 13. Present only the Shooting Accuracy from England, Italy and Russia

### Grouping
one or more of the following steps:
- Splitting the data into groups based on some criteria
- Applying a function to each group independently
- Combining the results into a data structure

In [None]:
### Group by sex
df.groupby('Sex')

In [None]:
df.groupby('Sex').sum()

In [None]:
df.groupby('Sex').sum()['Fare']

In [None]:
df.groupby('Sex')['Age']

In [None]:
df.groupby('Sex')['Age'].max()

In [None]:
df.groupby(['Sex', 'Embarked'])['Fare']

In [None]:
df.groupby(['Sex', 'Embarked'])['Fare'].sum()

# Ex2 - GroupBy

- Step 1. Import the necessary libraries
- Step 2. Import the dataset from this address. ('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/drinks.csv')
- Step 3. Assign it to a variable called drinks.
- Step 4. Which continent drinks more beer on average? 
- Step 5. For each continent print the statistics for wine consumption.
- Step 6. Print the mean alcohol consumption per continent for every column
- Step 7. Print the median alcohol consumption per continent for every column

### Merge &  Concatenate

#### Merge

In [None]:
df1 = pd.DataFrame({'my_key': ['K0', 'K1', 'K2', 'K3', 'k40'],
 'A': ['A0', 'A1', 'A2', 'A3', 'A40'],
 'B': ['B0', 'B1', 'B2', 'B3', 'B40']})
df2 = pd.DataFrame({'my_key': ['K1', 'K3', 'K0', 'K2', 'k5', 'k6'],
 'C': ['C0', 'C1', 'C2', 'C3', 'C5', 'C6'],
 'D': ['D0', 'D1', 'D2', 'D3', 'D5', 'D6']})

In [None]:
df1

In [None]:
df2

In [None]:
inner_joined_df = pd.merge(df1, df2, on='my_key', how='inner')
inner_joined_df

In [None]:
right_outer_joined_df = pd.merge(df1, df2, on='my_key', how='right')
right_outer_joined_df

In [None]:
left_outer_joined_df = pd.merge(df1, df2, on='my_key', how='left')
left_outer_joined_df

In [None]:
full_outer_joined_df = pd.merge(df1, df2, on='my_key', how='outer')
full_outer_joined_df

### Concatenate

In [None]:
df1

In [None]:
df2

In [None]:
df = pd.concat([df1, df2])
df

In [None]:
df2.rename(columns={"C": "A", "D": "B"})

In [None]:
df2

In [None]:
df2.rename(columns={"C": "A", "D": "B"}, inplace = True)

In [None]:
df2

In [None]:
pd.concat([df1, df2])


In [None]:
df = pd.concat([df1, df2], ignore_index=True)
df