# Getting started
To use pandas, you'll typically start with the following line of code.

In [115]:
#pip install numpy
#pip install pandas
import pandas as pd
import numpy as np

## 1. Creating data
There are two core objects in pandas: the <font color='red'><b> DataFrame </b></font> and the <font color='red'><b> Series </b></font>.

In [None]:
# From a list of lists
# Each nested List constructs a row without name
data = [['Alice', 25, 'New York'],
        ['Bob', 30, 'Paris'],
        ['Charlie', 35, 'London']]

df = pd.DataFrame(data)
df

In [None]:
# From a list of lists
# Each nested List constructs a row without name
data = [['Alice', 25, 'New York'],
        ['Bob', 30, 'Paris'],
        ['Charlie', 35, 'London']]
df = pd.DataFrame(data,columns=['name', 'age', 'city'])
df

In [None]:
data = [['Alice', 25, 'New York'],
        ['Bob', 30, 'Paris'],
        ['Charlie', 35, 'London']]

df = pd.DataFrame(data,columns=['name', 'age', 'city'],
                       index=['Student1','Student2','Student3'])
df

In [None]:
# From 'dictionary-list' or 'dictionary of Lists' format
# Each "key:value" pair, constructs a column by default

data = {'name': ['Alice', 'Bob', 'Charlie'],
        'age' : [25, 30, 35],
        'city': ['New York', 'Paris', 'London']}

df = pd.DataFrame(data)
df

In [None]:
# From 'dictionary-list' or 'dictionary of Lists' format
# Each "key:value" pair, constructs a column by default

data = {'name': ['Alice', 'Bob', 'Charlie'],
        'age' : [25, 30, 35],
        'city': ['New York', 'Paris', 'London']}

df = pd.DataFrame(data,index=['one','two','three'])
df

In [None]:
# From a list of dictionaries
# Each dictionary constructs a record or row
data = [{'name': 'Alice'  , 'age': 25, 'city': 'New York'},
        {'name': 'Bob'    , 'age': 30, 'city': 'Paris'},
        {'name': 'Charlie', 'age': 35, 'city': 'London'}]
df = pd.DataFrame(data)
df

In [None]:
# From a list of dictionaries
# Each dictionary constructs a record or row
data = [{'name': 'Alice'  , 'age': 25, 'city': 'New York'},
        {'name': 'Bob'    , 'age': 30, 'city': 'Paris'},
        {'Name': 'Charlie', 'age': 35, 'city': 'London'}]

df = pd.DataFrame(data)
df

In [None]:
# From a list of tuples
data = [('Alice', 25, 'New York'),
        ('Bob', 30, 'Paris'),
        ('Charlie', 35, 'London')]

df = pd.DataFrame(data, columns=['name', 'age', 'city'])
df

### 1.2. Series
A Series, by contrast, is a sequence of data values. If a DataFrame is a table, a Series is a list. And in fact you can create one with nothing more than a list:

In [None]:
pd.Series([1, 2, 3, 4, 5] , name='Test1')

* The Series and the DataFrame are intimately related. It's helpful to think of a DataFrame as actually being just a bunch of Series <font color='red'><b> glued together </b></font>.

In [None]:
one = pd.Series([1, 2, 3, 4, 5] , name='Test1')
two = pd.Series([6, 7, 8, 9, 10] , name='Test2')
test_table = pd.concat([one,two] , axis=1)
test_table

## 2. Reading data files

In [None]:
my_table = pd.read_csv('salesmonthly.csv')
my_table

### 2.1. Useful attributes and methods

In [None]:
my_table.shape

In [None]:
my_table.head()

In [None]:
my_table.head(3)

In [None]:
my_table.tail(3)

In [None]:
my_table.info()

In [None]:
my_table.describe()

In [None]:
my_table.describe(include = 'all')

### 2.2. Accessing DataFrame

In [None]:
my_table = pd.read_csv('salesmonthly.csv')
my_table.head(3)

In [None]:
# df['column']
my_table['product1']

In [None]:
#df.columnname
my_table.product1

In [None]:
# my_table['product1']
my_table[['product1']]

In [None]:
my_table.product 8

In [None]:
# column name in Farsi
my_table[['product 8']]

In [None]:
my_table['product 8'][0]

In [None]:
my_table.product1[0]

In [None]:
# my_table['product1','product2']
my_table[['product1','product2']]

In [None]:
my_table.product1.describe()

In [None]:
my_table.product1.mean()

In [None]:
my_table.product1.max()

In [None]:
my_table.product1.argmax()

In [None]:
my_table.Customer.unique()

In [None]:
my_table.Customer.nunique()

In [None]:
my_table.Customer.value_counts()

In [None]:
my_table.Customer.value_counts(normalize=True)*100

In [None]:
my_table.product1-100

In [None]:
my_table.product1 - my_table.product1.mean()

In [None]:
#Add column
my_table['Cus_length'] = my_table.Customer.apply(len)
my_table.head()

## 3. Indexing in Pandas

The indexing operator and attribute selection work just like the rest of the Python ecosystem.  
However, pandas has its own accessor operators, <font color='red'><b> loc </b></font> and <font color='red'><b> iloc </b></font> for more advanced operations.


In [None]:
my_table.head()

In [None]:
# output: Series
my_table.iloc[1]

In [None]:
#output : DataFrame
my_table.iloc[[1]]

In [None]:
my_table.iloc[1,]

In [None]:
my_table.iloc[1:3,0:6]

In [None]:
my_table.iloc[[1,4,12],[1,3,7]]

In [None]:
print(my_table.product1.max())
print(my_table.product1.argmax())
my_table.iloc[[my_table.product1.argmax()]]

In [None]:
my_table.iloc[-5:]

In [None]:
my_table[-5:]

In [None]:
my_table.iloc[:,1:7]

### 3.2. Label-based selection
This paradigm for attribute selection is the one followed by the <font color='red'><b> loc </b></font> operator: label-based selection. In this paradigm, it's the data index value, not its position, which matters.

In [None]:
my_table.head()

In [None]:
my_table.loc[0:3,'product1']

In [None]:
my_table.loc[0:3,['product1']]

In [None]:
my_table.iloc[0:3,[1]]

In [None]:
my_table_2 = pd.read_csv('salesmonthly.csv', index_col='date') #index_col = 0
my_table_2

In [None]:
my_table_2.loc[['1/31/2014','3/31/2014'],['product1','product5']]

In [None]:
my_table_2.loc['1/31/2014':'3/31/2014',['product1','product5']]

In [None]:
my_table_2.iloc[[0,2],[0,4]]

In [None]:
my_table_2.loc['1/31/2014':'5/31/2014','product1':'product4']