### Data frame is the most commonly used object in pandas. It is a table-like data structure containing rows and columns similar to excel spreedsheets

In [244]:
import pandas as pd 
from pandas import DataFrame 

weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35, 28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event':['Rain', 'Sunny','Snow','Snow','Sunny','Sunny']         
}

# print(weather_data)
df = DataFrame(weather_data)
df 

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [7]:
df.shape # (rows,columns)

(6, 4)

In [8]:
newdf = df[2:5] # data for from 3rd row through the 5th
newdf

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny


### Rows

In [12]:
df.head(4) #first 4 rows

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow


In [15]:
df.tail(2) # last 2 rows

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [17]:
df.index[df.day ==  "1/4/2017"] [0] # getting the index of the specified date

3

In [22]:
start_index = int(df.index[df.day == '1/4/2017'][0])
print(start_index)
print(type(start_index))

3
<class 'int'>


In [23]:
df[start_index:start_index+3]

Unnamed: 0,day,temperature,windspeed,event
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


### Columns

In [29]:
print(df.columns)
print(df.columns[0])
columns_name = df.columns.to_list()
print(columns_name)

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')
day
['day', 'temperature', 'windspeed', 'event']


In [31]:
df.event

0     Rain
1    Sunny
2     Snow
3     Snow
4    Sunny
5    Sunny
Name: event, dtype: object

In [33]:
event_list = df.event.to_list()
print(event_list)

['Rain', 'Sunny', 'Snow', 'Snow', 'Sunny', 'Sunny']


In [35]:
twodf = df[[ 'day','event']] # cloning to another data frame
twodf

Unnamed: 0,day,event
0,1/1/2017,Rain
1,1/2/2017,Sunny
2,1/3/2017,Snow
3,1/4/2017,Snow
4,1/5/2017,Sunny
5,1/6/2017,Sunny


In [47]:
df[['day','windspeed']]

Unnamed: 0,day,windspeed
0,1/1/2017,6
1,1/2/2017,7
2,1/3/2017,2
3,1/4/2017,7
4,1/5/2017,4
5,1/6/2017,2


### Operations On Data Frames

In [51]:
print('The mean temperature is:',df.temperature.mean())
print('The standard deviation is:',df['temperature'].std())



The mean temperature is: 30.333333333333332
The standard deviation is: 3.8297084310253524


In [64]:
# another way to do statistical analysis
df.describe() # presents only numerical data
new_df = df.describe()
print('The mean temperature is:', new_df['temperature']['mean'])
print('The standard deviation is:', new_df['temperature']['std'])

The mean temperature is: 30.333333333333332
The standard deviation is: 3.8297084310253524


In [67]:
count = len(df[df['temperature']>30])
print('we have',count,'days whose temperature is greater than 30')

we have 4 days whose temperature is greater than 30


In [71]:
min_temperature_day = df['day'][df['temperature']==df["temperature"].min()]
print(min_temperature_day)

3    1/4/2017
Name: day, dtype: object


In [73]:
df['event'].min() # minimum occuring string 
# .mean() won't work because event has string-type variables

'Rain'

In [93]:
print(df.event.value_counts(),"\n")
print(df.temperature.value_counts())

# prints the frequency of each variable 

Sunny    3
Snow     2
Rain     1
Name: event, dtype: int64 

32    2
35    1
28    1
24    1
31    1
Name: temperature, dtype: int64


In [84]:
print(event_list)
print(df.event.value_counts()[1])
# prints the number of occurances of the variable in events

print(len(df.event))
# prints the whole number of variables in events 

['Rain', 'Sunny', 'Snow', 'Snow', 'Sunny', 'Sunny']
2
6


In [116]:
# print the number of unique variables in a column
print(len(df.event.value_counts())) # 6 variables, 3 unique ones
print(len(df.day.value_counts())) # 6 variables, 6 unique ones

3
6


In [97]:
print(df.event.value_counts()[0]) # --> number of occurances
df.event.value_counts().index[0] # --> name of the variable

3


'Sunny'

In [100]:
print(f"Max event: {df.event.value_counts().index[0]} and it occured {df.event.value_counts()[0]}")

Max event: Sunny and it occured 3


### Pandas Series Operations:
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html

### Indexing 

In [224]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [225]:
# df["1/1/2017" : "1/4/2017"] --> error because we can not do.
# ...indexing by day
# to do so, we can index the whole data frame by day using:
df.set_index('day', inplace= True)
df
# notice the day tag shifted downard 


Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Sunny
1/6/2017,31,2,Sunny


In [226]:
# now we can do indexing by day 
print(df["1/1/2017" : "1/4/2017"])

          temperature  windspeed  event
day                                    
1/1/2017           32          6   Rain
1/2/2017           35          7  Sunny
1/3/2017           28          2   Snow
1/4/2017           24          7   Snow


In [227]:
# to return normal indexing again:
df.reset_index(inplace = True)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [228]:
new_df = df
new_df.set_index('event', inplace=True)


In [230]:
new_df # the event columns will shift to the most left

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Sunny,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [231]:
new_df.reset_index(inplace=True)# to prevent event from disappearing, just reset new_df before this
new_df.set_index('temperature', inplace=True)
new_df 

Unnamed: 0_level_0,event,day,windspeed
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32,Rain,1/1/2017,6
35,Sunny,1/2/2017,7
28,Snow,1/3/2017,2
24,Snow,1/4/2017,7
32,Sunny,1/5/2017,4
31,Sunny,1/6/2017,2


In [232]:
new_df

Unnamed: 0_level_0,event,day,windspeed
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32,Rain,1/1/2017,6
35,Sunny,1/2/2017,7
28,Snow,1/3/2017,2
24,Snow,1/4/2017,7
32,Sunny,1/5/2017,4
31,Sunny,1/6/2017,2


In [233]:
new_df.reset_index(inplace=True) # event becomes a column-head again

In [234]:
new_df

Unnamed: 0,temperature,event,day,windspeed
0,32,Rain,1/1/2017,6
1,35,Sunny,1/2/2017,7
2,28,Snow,1/3/2017,2
3,24,Snow,1/4/2017,7
4,32,Sunny,1/5/2017,4
5,31,Sunny,1/6/2017,2


In [235]:
new_df.set_index('event',inplace=True)
new_df

Unnamed: 0_level_0,temperature,day,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,32,1/1/2017,6
Sunny,35,1/2/2017,7
Snow,28,1/3/2017,2
Snow,24,1/4/2017,7
Sunny,32,1/5/2017,4
Sunny,31,1/6/2017,2


In [236]:
new_df.loc['Sunny'] # use df.loc [] when a column-head becomes an index 
                    # then you can locate all inside []

Unnamed: 0_level_0,temperature,day,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sunny,35,1/2/2017,7
Sunny,32,1/5/2017,4
Sunny,31,1/6/2017,2


In [245]:
df["new data"] = np.arange(0,6)
df.reset_index(inplace=True)
df

Unnamed: 0,index,day,temperature,windspeed,event,new data
0,0,1/1/2017,32,6,Rain,0
1,1,1/2/2017,35,7,Sunny,1
2,2,1/3/2017,28,2,Snow,2
3,3,1/4/2017,24,7,Snow,3
4,4,1/5/2017,32,4,Sunny,4
5,5,1/6/2017,31,2,Sunny,5


In [246]:
df.drop('event',inplace = True, axis = 1)
df.drop('index',inplace = True, axis=1)
df

Unnamed: 0,day,temperature,windspeed,new data
0,1/1/2017,32,6,0
1,1/2/2017,35,7,1
2,1/3/2017,28,2,2
3,1/4/2017,24,7,3
4,1/5/2017,32,4,4
5,1/6/2017,31,2,5


In [247]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [248]:
df.set_index('temperature', inplace= True)
df.loc[32]

Unnamed: 0_level_0,day,windspeed,new data
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32,1/1/2017,6,0
32,1/5/2017,4,4


In [249]:
df.head()

Unnamed: 0_level_0,day,windspeed,new data
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
32,1/1/2017,6,0
35,1/2/2017,7,1
28,1/3/2017,2,2
24,1/4/2017,7,3
32,1/5/2017,4,4


### Creating A Hand-Made Data Frame

In [1]:
import pandas as pd 
from pandas import DataFrame 

columns = []
data = dict()

num = int(input('please enter the number of columns: '))
while num>0:
    columns.append(input('please enter the column name: '))
    num-=1

for i in columns:
    data[i]=[]

rows = int(input('please enter the number of rows: '))
while rows>0:
    for i in data:
        value = input(f"please enter the value of {i}: ")
        data[i].append(value)
    rows-=1
    
data_frame = DataFrame(data)
data_frame

please enter the number of columns: 2
please enter the column name: Name 
please enter the column name: Age
please enter the number of rows: 3
please enter the value of Name : Mohamed
please enter the value of Age: 20
please enter the value of Name : Ali
please enter the value of Age: 30
please enter the value of Name : Ahmed
please enter the value of Age: 40


Unnamed: 0,Name,Age
0,Mohamed,20
1,Ali,30
2,Ahmed,40
