### Pandas mean Python Data Analysis Library

### The library includes two data structures: the Series, which is 1D, and the DataFrame, which is 2D. 
### While the DataFrame is the primary pandas data structure, a DataFrame is actually a collection of Series objects. 
### Therefore, it’s important to understand Series as well as DataFrames.

In [13]:
import pandas as pd

In [14]:
# Creating a Series (1D)
data = ["Bob", "Jack", "Amy"]
emps_names = pd.Series(data)
print(emps_names)

0     Bob
1    Jack
2     Amy
dtype: object


In [28]:
# Creating a Series with user-defined indices (can be string or int like Dictionary)
emps_names1 = pd.Series(data, index = [100, 200, "abc"])
print(emps_names1)

100     Bob
200    Jack
abc     Amy
dtype: object


Accessing Data in Series

In [22]:
print(emps_names1["abc"])
print(emps_names1[200])

Amy
Jack


In [23]:
# also can use loc[] to retrieve data using defined index
print(emps_names1.loc["abc"])
print(emps_names1.loc[200])

Amy
Jack


In [24]:
# Use iloc for location based indexing, the usual index for list
print(emps_names1.iloc[2])
print(emps_names1.iloc[1])

Amy
Jack


In [30]:
# slicing also possible
print(emps_names1.loc[100:200])

100     Bob
200    Jack
dtype: object


In [34]:
print(emps_names1.iloc[0:2]) 

# same as 
print(emps_names1[0:2])

100     Bob
200    Jack
dtype: object
100     Bob
200    Jack
dtype: object


In [50]:
# combining Series into DataFrame
import random
data = ['bob.lee','jack.loh','amy.tan']
email = ["gmail.com", "yahoo.com"]
for i in range(len(data)):
    data[i] = (f"{data[i]}@{random.choice(email)}")
print(data)
print()
emps_emails = pd.Series(data, index = [100, 200, "abc"], name = "emails")
emps_names1.name = "names"
df = pd.concat([emps_names1,emps_emails], axis=1)
print(df)


['bob.lee@yahoo.com', 'jack.loh@gmail.com', 'amy.tan@yahoo.com']

    names              emails
100   Bob   bob.lee@yahoo.com
200  Jack  jack.loh@gmail.com
abc   Amy   amy.tan@yahoo.com


In [63]:
# pandas allows you to have DataFrame indexes of any type.
#  You can also create a DataFrame by loading data from a database, a CSV file, an API request, 
# or another external source using one of the pandas library’s reader methods
# can read JSON, excel into DataFrame

import yfinance as yf
tkr = yf.Ticker('TSLA')
hist = tkr.history(period="5d") # already in DataFrame format
hist = hist.drop("Dividends", axis = 1)
hist = hist.drop("Stock Splits", axis = 1)
hist = hist.reset_index() # give each row an index
print(hist)


                       Date        Open        High         Low       Close  \
0 2023-07-10 00:00:00-04:00  276.470001  277.519989  265.100006  269.609985   
1 2023-07-11 00:00:00-04:00  268.649994  270.899994  266.369995  269.790009   
2 2023-07-12 00:00:00-04:00  276.329987  276.519989  271.459991  271.989990   
3 2023-07-13 00:00:00-04:00  274.589996  279.450012  270.600006  277.899994   
4 2023-07-14 00:00:00-04:00  277.010010  285.299988  276.309998  281.380005   

      Volume  
0  119425400  
1   91972400  
2   95672100  
3  112681500  
4  119771100  


In [64]:
# use data as index instead
hist = hist.set_index('Date')
print(hist)

                                 Open        High         Low       Close  \
Date                                                                        
2023-07-10 00:00:00-04:00  276.470001  277.519989  265.100006  269.609985   
2023-07-11 00:00:00-04:00  268.649994  270.899994  266.369995  269.790009   
2023-07-12 00:00:00-04:00  276.329987  276.519989  271.459991  271.989990   
2023-07-13 00:00:00-04:00  274.589996  279.450012  270.600006  277.899994   
2023-07-14 00:00:00-04:00  277.010010  285.299988  276.309998  281.380005   

                              Volume  
Date                                  
2023-07-10 00:00:00-04:00  119425400  
2023-07-11 00:00:00-04:00   91972400  
2023-07-12 00:00:00-04:00   95672100  
2023-07-13 00:00:00-04:00  112681500  
2023-07-14 00:00:00-04:00  119771100  


In [67]:
# How to convert json into pandas obj
import json
import pandas as pd
data = [ 
    {"Empno":9001,"Salary":3000}, 
    {"Empno":9002,"Salary":2800}, 
    {"Empno":9003,"Salary":2500}
    ]

json_data = json.dumps(data) # convert json into string from list,  can use actual json file or URL instead
salary = pd.read_json(json_data)
salary = salary.set_index("Empno")
print(salary)

       Salary
Empno        
9001     3000
9002     2800
9003     2500


In [70]:
# Create dataframe from list of lists (2D array)

import pandas as pd
data = [
    ['9001','Jeff Russell', 'sales'],
    ['9002','Jane Boorman', 'sales'],
    ['9003','Tom Heints', 'sales']
 ]
emps = pd.DataFrame(data, columns = ['Empno', 'Name', 'Job'])
column_types = {'Empno': int, 'Name': str, 'Job': str}
emps = emps.astype(column_types)
emps = emps.set_index('Empno')
print(emps)


               Name    Job
Empno                     
9001   Jeff Russell  sales
9002   Jane Boorman  sales
9003     Tom Heints  sales
