#  **Pandas Series**

In [15]:
# Importing Pandas
import pandas as pd 

In [16]:
# This code creates a pandas Series containing the elements [10, 88, 31, 62, 69].
pd.Series([10,88,31,62,69])

0    10
1    88
2    31
3    62
4    69
dtype: int64

In [17]:
# We assign the code to the value x
x = pd.Series([10,88,31,62,69])
x

0    10
1    88
2    31
3    62
4    69
dtype: int64

In [18]:
# axes method in pandas is used to return the list of row axis labels for the given data structure.
x.axes

# The dtype method in pandas is used to return the data type of the elements in a pandas data structure, such as a Series or DataFrame.
x.dtype

# The size method in pandas is used to return the number of elements in a pandas data structure, such as a Series or DataFrame.
x.size

# The ndim method in pandas is used to return the number of dimensions (axes) of a pandas data structure, such as a Series or DataFrame.
x.ndim

# The values method in pandas is used to return the underlying data of a pandas data structure, such as a Series or DataFrame, as a NumPy array.
x.values

# The keys method in pandas is used to return the index (row) labels of a pandas Series or the column labels of a pandas DataFrame
x.keys

# The head method in pandas is used to view the first few rows of a pandas data structure, such as a Series or DataFrame.
x.head

# The tail method in pandas is used to return the last n rows of a pandas data structure, such as a Series or DataFrame.
x.tail

<bound method NDFrame.tail of 0    10
1    88
2    31
3    62
4    69
dtype: int64>

# **Index Nomenclature**

In [19]:
# This code creates a pandas Series with values [99, 97, 31, 23, 42] and assigns custom index labels [1, 3, 5, 7, 9] to the corresponding elements.
pd.Series([99,97,31,23,42], index=[1,3,5,7,9]) 

# similarly, we can also assign letters to the index values
pd.Series([99,97,31,23,42], index=["a","b","c","d","e"]) 

a    99
b    97
c    31
d    23
e    42
dtype: int64

# **Creating a List from a Dictionary**

In [20]:
# This code creates a Python dictionary with keys 'reg', 'log', and 'cart', and corresponding values 10, 11, and 12. 
# Then, it converts the dictionary into a pandas Series, where the dictionary keys become the index labels of the Series, 
# and the dictionary values become the elements of the Series.
dictionary = {"reg":10, "log":11, "cart":12}
x = pd.Series(dictionary)
x

reg     10
log     11
cart    12
dtype: int64

# **Employee Inquiry**

In [21]:
# is reg in the dictionary
"reg" in dictionary

True

In [22]:
# is REG in the dictionary
"REG" in dictionary

False

# **Creating a Pandas DataFrame**

In [23]:
# l = [1, 2, 39, 67, 90]: This line creates a Python list called l containing the elements [1, 2, 39, 67, 90].
l = [1,2,39,67,90]

# pd.DataFrame(l, columns=["x"]): This line creates a pandas DataFrame using the list l as data and assigns the column name "x" to the single column 
# in the DataFrame.
pd.DataFrame(l, columns=["x"])

Unnamed: 0,x
0,1
1,2
2,39
3,67
4,90


In [24]:
# This code imports the NumPy library as np, creates a 1-dimensional array m using np.arange(1, 10), and then reshapes it into a 3x3 matrix using reshape((3,3)). 
# After that, it creates a pandas DataFrame from the matrix m with column names "var1", "var2", and "var3".
import numpy as np
m = np.arange(1,10).reshape((3,3))
pd.DataFrame(m, columns=["var1","var2","var3"])

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


# **DataFrame Nomenclature**

In [25]:
# This code assigns new column names to a DataFrame. It changes the column names from their current names to "deg1", "deg2", and "deg3". 
df = pd.DataFrame(m, columns=["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,1,2,3
1,4,5,6
2,7,8,9


# **Element Operations With DataFrame**

In [30]:
# Creates a NumPy array containing 5 integers between 0 and 9 randomly.
s1 = np.random.randint(10, size =5)
s2 = np.random.randint(10, size =5)
s3 = np.random.randint(10, size =5)

# This code creates a dictionary named "dictionary" with three key-value pairs, where "var1," "var2," and "var3" are keys, and s1, s2, and s3 are associated values.
dictionary ={"var1" : s1, "var2":s2, "var3":s3}
dictionary

# This code creates a pandas DataFrame from a dictionary and then displays the DataFrame.
df = pd.DataFrame(dictionary)
df


Unnamed: 0,var1,var2,var3
0,8,7,7
1,9,5,2
2,8,1,5
3,1,4,9
4,5,8,3


In [33]:
# This code uses the drop method on a DataFrame df to remove the row with index label 1 along the 0-axis (rows). The inplace=True parameter is set, 
# which means the deletion will be done in-place, making it a permanent change
df.drop(0, axis = 0 , inplace = True) # inplace ile sildigimiz satırı kalıcı halde silmis oluyoruz..
df

Unnamed: 0,var1,var2,var3
2,8,1,5
3,1,4,9
4,5,8,3


In [37]:
# This code defines a list l containing the elements [3, 4]. Then, it drops the rows with index labels 3 and 4 from the 
# DataFrame df along the 0-axis (rows axis) and returns the resulting DataFrame.
l = [3,4]
df.drop(l, axis=0)

Unnamed: 0,var1,var2,var3
2,8,1,5


In [39]:
# "We are using a for loop to check if the values we assigned to l exist in the DataFrame."
l = ["var1","var4","var2"]

for x in l:
    print(x in df)

True
False
True


# **Loc & Iloc**

In [43]:
# This code generates a pandas DataFrame 'df' with 10 rows and 3 columns, filled with random integers from 1 to 29 inclusive, 
# and assigns the column names as "var1", "var2", and "var3".
m = np.random.randint(1,30, size =(10,3))
df = pd.DataFrame(m, columns = ["var1","var2","var3"])
df

Unnamed: 0,var1,var2,var3
0,7,12,26
1,9,18,18
2,25,18,14
3,9,22,6
4,23,13,17
5,8,1,18
6,4,10,12
7,2,16,14
8,28,24,24
9,24,23,15


In [48]:
# loc
# "It is used to make a selection in the way it is defined."
df.loc[0:3]

Unnamed: 0,var1,var2,var3
0,7,12,26
1,9,18,18
2,25,18,14
3,9,22,6


In [49]:
# iloc
# "We make selections using the indexing logic we are familiar with."
df.iloc[0:3]

Unnamed: 0,var1,var2,var3
0,7,12,26
1,9,18,18
2,25,18,14


# **Join**

In [51]:
# This code generates a pandas DataFrame 'df1' with 5 rows and 3 columns, filled with random integers from 1 to 29 inclusive, and assigns the column names 
# "var1", "var2", and "var3" to the respective columns.
m = np.random.randint(1,30, size = (5,3))
df1 = pd.DataFrame(m, columns = ["var1","var2","var3"])
df1

Unnamed: 0,var1,var2,var3
0,28,24,21
1,22,4,29
2,1,7,5
3,15,4,17
4,5,11,28


In [53]:
# df2 = df1 + 99: This line creates a new DataFrame 'df2' by adding 99 to each element of the original DataFrame 'df1'.
df2 = df1 + 99
df2

Unnamed: 0,var1,var2,var3
0,127,123,120
1,121,103,128
2,100,106,104
3,114,103,116
4,104,110,127


In [55]:
# This code concatenates two pandas DataFrames, df1 and df2, along the row axis (vertically), and the ignore_index=True parameter ensures that the resulting 
# DataFrame has a new index without considering the original indices from df1 and df2.
pd.concat([df1,df2], ignore_index = True)

Unnamed: 0,var1,var2,var3
0,28,24,21
1,22,4,29
2,1,7,5
3,15,4,17
4,5,11,28
5,127,123,120
6,121,103,128
7,100,106,104
8,114,103,116
9,104,110,127


# **Advanced Merging Operations**

In [61]:
# This code creates a pandas DataFrame called 'df1' with two columns: "Calisanlar" (Employees) and "Grup" (Group). The DataFrame contains data about employees and 
# their corresponding departments/groups. Each row represents an employee with their name and the department they belong to.
df1 = pd.DataFrame({
                   "Calisanlar": ["Ali","Veli","Ayse","Fatma"],
                   "Grup":["Muhasebe","Muhendislik","Muhendislik","İK"] 
                   })
df1

Unnamed: 0,Calisanlar,Grup
0,Ali,Muhasebe
1,Veli,Muhendislik
2,Ayse,Muhendislik
3,Fatma,İK


In [63]:
# This code creates a pandas DataFrame named 'df2' with two columns: "Calisanlar" containing names of employees and "ilkGiris" containing their 
# corresponding year of initial employment.
df2 = pd.DataFrame({"Calisanlar": ["Ayse","Ali","Veli","Fatma"],
                   "ilkGiris":[2010,2009,2014,2019]})
df2

Unnamed: 0,Calisanlar,ilkGiris
0,Ayse,2010
1,Ali,2009
2,Veli,2014
3,Fatma,2019


In [65]:
# pd.merge(df1, df2): This line performs a database-style merge operation on the two pandas DataFrames, df1 and df2, based on common columns between them. 
# The resulting DataFrame will contain rows that have matching values in the specified columns from both DataFrames.
pd.merge(df1,df2, on = "Calisanlar")

# or 
df3 = pd.merge(df1,df2)
df3

Unnamed: 0,Calisanlar,Grup,ilkGiris
0,Ali,Muhasebe,2009
1,Veli,Muhendislik,2014
2,Ayse,Muhendislik,2010
3,Fatma,İK,2019


In [68]:
# This code creates a pandas DataFrame named 'df4' with two columns, "Grup" and "Mudur", and three rows of data. The "Grup" column contains the values 
# "Muhasebe", "Muhendislik", and "İK", while the "Mudur" column contains the values "Caner", "Mustafa", and "Berkcan".
df4 = pd.DataFrame({"Grup":["Muhasebe","Muhendislik","İK"],
                   "Mudur":["Caner","Mustafa","Berkcan"]})
df4

Unnamed: 0,Grup,Mudur
0,Muhasebe,Caner
1,Muhendislik,Mustafa
2,İK,Berkcan


In [76]:
pd.merge(df3,df4)

Unnamed: 0,Calisanlar,Grup,ilkGiris,Mudur
0,Ali,Muhasebe,2009,Caner
1,Veli,Muhendislik,2014,Mustafa
2,Ayse,Muhendislik,2010,Mustafa
3,Fatma,İK,2019,Berkcan


In [73]:
# This code creates a pandas DataFrame named df5 with two columns: "Grup" and "Yetenekler". The DataFrame contains data about different groups 
# (e.g., Accounting, Engineering, HR) and their corresponding skills (e.g., Mathematics, Excel, Coding).
df5 = pd.DataFrame({
        "Grup":["Muhasebe","Muhasebe","Muhendıslık","Muhendıslık","İK","İK"],
        "Yetenekler":["Matematik","Excel","Kodlama","Linux","Excel","Yonetim"]
                   })
df5

Unnamed: 0,Grup,Yetenekler
0,Muhasebe,Matematik
1,Muhasebe,Excel
2,Muhendıslık,Kodlama
3,Muhendıslık,Linux
4,İK,Excel
5,İK,Yonetim


In [77]:
pd.merge(df1,df5)

Unnamed: 0,Calisanlar,Grup,Yetenekler
0,Ali,Muhasebe,Matematik
1,Ali,Muhasebe,Excel
2,Fatma,İK,Excel
3,Fatma,İK,Yonetim


# **Aggregation & Grouping**

### **Simple Aggregation Functions**

* **count** : The count method in pandas is used to count the number of non-null values in each column of a DataFrame or Series.

* **first** : The first method in pandas is used to get the first n rows of a DataFrame or Series.

* **last** : The pandas last function is not a built-in function in pandas as of my knowledge cutoff in September 2021. There is no official last function in pandas. If it has been introduced after my last update, I wouldn't be aware of its functionality.

* **mean** : The pandas mean function is used to calculate the average (mean) value of a numerical Series or DataFrame along the specified axis.

* **median** : The pandas median function is used to calculate the median value of a pandas Series or DataFrame, which represents the middle value of a sorted dataset.

* **min** : The pandas min method returns the minimum value in a pandas data structure, such as a Series or DataFrame.

* **max** : The pandas max function is used to find the maximum value in a pandas Series or DataFrame along a specified axis.

* **std** : The pandas std function is used to calculate the standard deviation of a pandas data structure, such as a Series or DataFrame. It measures the amount of variation or dispersion in the data.

* **var** : The var method in pandas is used to calculate the variance of a pandas DataFrame or Series, which measures the average squared deviation from the mean.

* **sum** : The pandas sum method is used to calculate the sum of values in a pandas data structure like Series or DataFrame.


In [79]:
# # Importing Seaborn
import seaborn as sns

In [81]:
# The code loads a dataset called "planets" using the Seaborn library and assigns it to a pandas DataFrame named "df".
df = sns.load_dataset("planets")
df

Unnamed: 0,method,number,orbital_period,mass,distance,year
0,Radial Velocity,1,269.300000,7.10,77.40,2006
1,Radial Velocity,1,874.774000,2.21,56.95,2008
2,Radial Velocity,1,763.000000,2.60,19.84,2011
3,Radial Velocity,1,326.030000,19.40,110.62,2007
4,Radial Velocity,1,516.220000,10.50,119.47,2009
...,...,...,...,...,...,...
1030,Transit,1,3.941507,,172.00,2006
1031,Transit,1,2.615864,,148.00,2007
1032,Transit,1,3.191524,,174.00,2007
1033,Transit,1,4.125083,,293.00,2008


In [82]:
df.shape

(1035, 6)

In [85]:
df["mass"].mean()

2.6381605847953216

In [86]:
df["mass"].count()

513

In [88]:
df["mass"].min()

0.0036

In [89]:
df["mass"].max()

25.0

In [90]:
df["mass"].sum()

1353.37638

In [91]:
df["mass"].std()

3.8186166509616046

In [92]:
df["mass"].var()

14.58183312700122

In [98]:
# The describe method in pandas is used to generate descriptive statistics of a DataFrame, providing information such as count, mean, standard deviation, 
# minimum, and quartile values for the numerical columns.
df.describe()

Unnamed: 0,number,orbital_period,mass,distance,year
count,1035.0,992.0,513.0,808.0,1035.0
mean,1.785507,2002.917596,2.638161,264.069282,2009.070531
std,1.240976,26014.728304,3.818617,733.116493,3.972567
min,1.0,0.090706,0.0036,1.35,1989.0
25%,1.0,5.44254,0.229,32.56,2007.0
50%,1.0,39.9795,1.26,55.25,2010.0
75%,2.0,526.005,3.04,178.5,2012.0
max,7.0,730000.0,25.0,8500.0,2014.0


In [100]:
# The code df.describe().T in pandas is used to generate the descriptive statistics of a DataFrame in a transposed format, 
# providing summary statistics for each column (feature) of the DataFrame.
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
number,1035.0,1.785507,1.240976,1.0,1.0,1.0,2.0,7.0
orbital_period,992.0,2002.917596,26014.728304,0.090706,5.44254,39.9795,526.005,730000.0
mass,513.0,2.638161,3.818617,0.0036,0.229,1.26,3.04,25.0
distance,808.0,264.069282,733.116493,1.35,32.56,55.25,178.5,8500.0
year,1035.0,2009.070531,3.972567,1989.0,2007.0,2010.0,2012.0,2014.0


In [102]:
# It drops the rows with missing values (NaN) from the DataFrame 'df' and then computes descriptive statistics on the remaining data using the describe() method, 
# providing information like count, mean, standard deviation, minimum, quartiles, and maximum for each column.
df.dropna().describe()

Unnamed: 0,number,orbital_period,mass,distance,year
count,498.0,498.0,498.0,498.0,498.0
mean,1.73494,835.778671,2.50932,52.068213,2007.37751
std,1.17572,1469.128259,3.636274,46.596041,4.167284
min,1.0,1.3283,0.0036,1.35,1989.0
25%,1.0,38.27225,0.2125,24.4975,2005.0
50%,1.0,357.0,1.245,39.94,2009.0
75%,2.0,999.6,2.8675,59.3325,2011.0
max,6.0,17337.5,25.0,354.0,2014.0


# **Grouping Operations**

In [104]:
#This code creates a pandas DataFrame named 'df' with two columns: 'gruplar' and 'veri'. The 'gruplar' column contains categorical data 
# ("a", "b", and "c") representing different groups, and the 'veri' column contains corresponding numerical data (10, 11, 52, 23, 43, and 55) associated with each group.
df = pd.DataFrame({"gruplar": ["a","b","c","a","b","c"],
                  "veri":[10,11,52,23,43,55]}, columns = ["gruplar","veri"])
df

Unnamed: 0,gruplar,veri
0,a,10
1,b,11
2,c,52
3,a,23
4,b,43
5,c,55


In [107]:
# The code df.groupby("groups").mean() calculates the mean (average) value of each group in the DataFrame 'df' based on the values in the "groups" column. 
# It groups the data by the unique values in the "groups" column and then computes the mean value for each group, resulting in a new DataFrame.
df.groupby("gruplar").mean()

#Then, the sum method is applied to each group to calculate the sum of values for each group separately, resulting in a new DataFrame.
df.groupby("gruplar").sum()


Unnamed: 0_level_0,veri
gruplar,Unnamed: 1_level_1
a,33
b,54
c,107


# **Aggregate & Filter & Transfrom & Apply**

In [109]:
# The code creates a pandas DataFrame named 'df' with three columns: "gruplar" containing categorical data (group names 'A', 'B', 'C'), "degisken1" 
# containing numeric data ([10, 23, 33, 22, 11, 99]), and "degisken2" containing another set of numeric data ([100, 253, 333, 262, 111, 969]). 
# The DataFrame is constructed using a Python dictionary, and the column names are specified using the 'columns' parameter.
df = pd.DataFrame({"gruplar"   : ["A","B","C","A","B","C"],
                   "degisken1" : [10,23,33,22,11,99],
                   "degisken2" : [100,253,333,262,111,969]},
                   columns = ["gruplar","degisken1","degisken2"])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


### **Aggregate**

In [114]:
# This code groups the DataFrame 'df' by the "gruplar" column and then applies three aggregation functions to each group: minimum value, median, and maximum value. 
# The result will be a new DataFrame that shows the minimum, median, and maximum values for each group in the "gruplar" column.
df.groupby("gruplar").aggregate(["min", np.median, max])

Unnamed: 0_level_0,degisken1,degisken1,degisken1,degisken2,degisken2,degisken2
Unnamed: 0_level_1,min,median,max,min,median,max
gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,16.0,22,100,181.0,262
B,11,17.0,23,111,182.0,253
C,33,66.0,99,333,651.0,969


In [113]:
# This code groups the DataFrame 'df' by the column "gruplar" and then applies aggregation functions to the columns "degisken1" and "degisken2". 
# It calculates the minimum value for "degisken1" and the maximum value for "degisken2" for each group.
df.groupby("gruplar").aggregate({"degisken1":"min",
                                 "degisken2":"max"})

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,262
B,11,253
C,33,969


### **Filter**

In [116]:
df = pd.DataFrame({"gruplar"   : ["A","B","C","A","B","C"],
                   "degisken1" : [10,23,33,22,11,99],
                   "degisken2" : [100,253,333,262,111,969]},
                   columns = ["gruplar","degisken1","degisken2"])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [146]:
# This code defines a filtering function named filter_func that takes a DataFrame 'x' as input and returns True if the standard deviation of the column 'degisken1' 
# is greater than 9, otherwise returns False.
def filter_func(x):
    return x["degisken1"].std() > 9

In [121]:
# df.groupby("groups").filter(filter_func): This line performs grouping on the DataFrame 'df' based on the values in the "groups" column and applies the filtering 
# function 'filter_func' to each group. The result will be a new DataFrame containing only the rows that satisfy the conditions specified in the 'filter_func' for each group.
df.groupby("gruplar").filter(filter_func)

Unnamed: 0,gruplar,degisken1,degisken2
2,C,33,333
5,C,99,969


### **Transform**

In [122]:
df = pd.DataFrame({"gruplar"   : ["A","B","C","A","B","C"],
                   "degisken1" : [10,23,33,22,11,99],
                   "degisken2" : [100,253,333,262,111,969]},
                   columns = ["gruplar","degisken1","degisken2"])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [123]:
# df_a = df.iloc[:, 1:3]: This line creates a new DataFrame 'df_a' by selecting the columns from index 1 to 2 (exclusive) from the original DataFrame 'df'.
df_a = df.iloc[:,1:3]

In [124]:
# This code applies a transformation to each column of the DataFrame 'df_a' using the lambda function. The transformation standardizes each column by subtracting the 
# column mean and dividing by the column standard deviation.
df_a.transform(lambda x: (x-x.mean()) / x.std())

Unnamed: 0,degisken1,degisken2
0,-0.687871,-0.738461
1,-0.299074,-0.263736
2,0.0,-0.015514
3,-0.328982,-0.235811
4,-0.657963,-0.704331
5,1.97389,1.957853


### **Apply**

In [125]:
df = pd.DataFrame({"gruplar"   : ["A","B","C","A","B","C"],
                   "degisken1" : [10,23,33,22,11,99],
                   "degisken2" : [100,253,333,262,111,969]},
                   columns = ["gruplar","degisken1","degisken2"])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [127]:
# The code df.apply(np.sum) applies the NumPy sum function to each column of the pandas DataFrame 'df', resulting in a new pandas Series containing 
# the sum of values for each column.
df.apply(np.sum)

gruplar      ABCABC
degisken1       198
degisken2      2028
dtype: object

In [128]:
# This code groups the DataFrame 'df' by the values in the "gruplar" column and then applies the numpy sum function to each group, resulting in a new DataFrame.
df.groupby("gruplar").apply(np.sum)

Unnamed: 0_level_0,gruplar,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,AA,32,362
B,BB,34,364
C,CC,132,1302


# **Pivot Tables**

In [133]:
# This code loads the 'titanic' dataset from the seaborn library and assigns it to the variable 'titanic'.
titanic = sns.load_dataset('titanic')

# This code displays the first few rows of the 'titanic' DataFrame, providing a preview of its contents.
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [135]:
# This code groups the "titanic" DataFrame by the "sex" column, selects the "survived" column, and then calculates the mean of the selected column for each group.
titanic.groupby("sex")[["survived"]].mean()

Unnamed: 0_level_0,survived
sex,Unnamed: 1_level_1
female,0.742038
male,0.188908


In [138]:
# This code creates a pivot table from the "titanic" DataFrame, with "survived" as the values, "sex" as the index, and "class" as the columns.
titanic.pivot_table("survived", index = "sex", columns = "class")

class,First,Second,Third
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,0.968085,0.921053,0.5
male,0.368852,0.157407,0.135447


In [140]:
# This code creates a new pandas Series named 'age' by cutting the 'age' column in the 'titanic' DataFrame into bins [0, 18] and (18, 90]. 
# It then displays the first 10 elements of the new 'age' Series.
age = pd.cut(titanic["age"], [0,18,90])
age.head(10)

0    (18.0, 90.0]
1    (18.0, 90.0]
2    (18.0, 90.0]
3    (18.0, 90.0]
4    (18.0, 90.0]
5             NaN
6    (18.0, 90.0]
7     (0.0, 18.0]
8    (18.0, 90.0]
9     (0.0, 18.0]
Name: age, dtype: category
Categories (2, interval[int64, right]): [(0, 18] < (18, 90]]

In [141]:
# This code creates a pivot table from the "titanic" DataFrame to analyze the "survived" column based on the "sex" and "age" columns,
# with "class" as the columns in the pivot table.
titanic.pivot_table("survived", ["sex",age], "class" )

Unnamed: 0_level_0,class,First,Second,Third
sex,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
female,"(0, 18]",0.909091,1.0,0.511628
female,"(18, 90]",0.972973,0.9,0.423729
male,"(0, 18]",0.8,0.6,0.215686
male,"(18, 90]",0.375,0.071429,0.133663


# **Reading Outsourced Data**

In [142]:
# csv reading
pd.read_csv("reading_data/ornekcsv.csv", sep = ";")

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0


In [143]:
# txt reading
pd.read_csv("reading_data/duz_metin.txt")

Unnamed: 0,1 2
0,2 2
1,3 2
2,4 2
3,5 2
4,6 2
5,7 2
6,8 2
7,9 2
8,10 2


In [144]:
# excel reading
pd.read_excel("reading_data/ornekx.xlsx")

Unnamed: 0,a,b,c
0,78,12,1.0
1,78,12,2.0
2,78,324,3.0
3,7,2,4.0
4,88,23,5.0
5,6,2,
6,56,11,6.0
7,7,12,7.0
8,56,21,7.0
9,346,2,8.0
