In [None]:
import numpy as np
import pandas as pd
pd.__version__

<p class="lead display-3 alert alert-info">1. Pandas <strong>Objects</strong> can be thought of, numpy arrays with custom structured dtypes.</p>

<h1>Three kinds of Pandas datastructures:</h1>

<h3 class="display-4">Series Objects</h3>
<blockquote>A Pandas Series is a one-dimensional array of indexed data. It can be created from a list or array</blockquote>

<h3 class="display-4">DataFrame Objects</h3>
<blockquote>DataFrame is an analog of a two-dimensional array with both flexible row indices and flexible column names</blockquote>

<h3 class="display-4">Index Objects</h3>
<blockquote>A Pandas Series is a one-dimensional array of indexed data. It can be created from a list or array as follows:</blockquote>

In [None]:
# We can create a series object with pd.Series
# We can pass a few params to it.
my_series = pd.Series(data=np.linspace(1, 2, 5), name="Values b/w 1 to 2")

In [None]:
# Lets explore the above series.

print("The series as it looks:", end="\n\n")
print(my_series, end="\n\n")

print("Values property of the object: All values will be shown as a list")
print(my_series.values, end="\n\n")

print("Index of this series:")
print(my_series.index, end="\n\n")

print("Let's get the first few values using slicing")
print(my_series[1:3])

<h3>Above we can note a few things</h3>
<ul class="list-group">
    <li class="list-group-item">If you provide a name too, it shows up. Else it's None.</li>
    <li class="list-group-item">Index by default, starts from 0.</li>
    <li class="list-group-item">We can get values only with the values property.</li>
    <li class="list-group-item">Sliced series will also show indexes</li>
</ul>

<p class="lead display-3 alert alert-info">2. We can have our own index range</p>

<ul class="list-group">
    <li class="list-group-item">It can start with anything like a letter, or a number</li>
    <li class="list-group-item">Unlike python indexing, you can then use these custom indexes</li>
</ul>

In [None]:
series_with_str_index = pd.Series(np.random.random(5), index=['a', 'b', 'c', 'd', 'e'])
print(series_with_str_index.index)
print(series_with_str_index['b':'d'])

<p class="lead display-3 alert alert-info">3. Series as specialized dictionary.</p>

In [None]:
# Let's create a panda series object out of a python dictionary.

person_dict = {'name':'Jaddu', 'age':40, 'weigth': 68.5}

print("The Series:", end='\n\n')
dict_series = pd.Series(person_dict)
print(dict_series, end='\n\n')

print("Index of this series")
print(dict_series.index, end='\n\n')

print("Slicing:")
print(dict_series['name':'age'], end='\n\n')

print("Note: The index keys are behind-the-scene sorted and used while slicing.")

<h3>We can create Series from one value too</h3>
<p>If we provide an index list too, value will be the same for every index</p>

In [None]:
pd.Series('a')

In [None]:
pd.Series('a', index=[0,1,2])

<h3>We can can values of only selected indexes (with a dictionary as data)</h3>


In [None]:
pd.Series(person_dict, index=['name', 'age'])

<h1 class="alert alert-info">Dataframes</h1>

<p> You can say that it is a collection of series with a generalised index for easy access to the data.</p>

In [None]:
# How to create a DataFrame

price_grocery = {'rice': 12, 'eggs':5.5, 'milk': 3, 'coke': 2.99}
category_grocery = {'rice': 'Staple', 'eggs': 'Poultry', 'coke': 'Beverages', 'milk':'Milk Product'}
stock_grocery = {'rice': 100, 'eggs': 200, 'coke': 50, 'milk':10}

price_series = pd.Series(price_grocery)
category_series = pd.Series(category_grocery)
stock_series = pd.Series(stock_grocery)

# Create from a dictionary of series objects.
grocery_df = pd.DataFrame({'unit_price':price_series, 'category':category_series, 'stock':stock_series})
grocery_df

<blockquote>Thus DataFrame can be thought of as a generalization of a two-dimensional NumPy array, where both the rows and columns have a generalized index for accessing the data.</blockquote>

In [None]:
# Exploring a DataFrame

## Index of a DataFrame
print(grocery_df.index)

## Columns of DataFrame
print(grocery_df.columns)

<blockquote>TIP:</mark> We can also think of a DataFrame as a specialization of a dictionary. Where a dictionary maps a key to a value, a DataFrame maps a column name to a Series of column data</blockquote>

<h3>Accesing Data</h3>

In [None]:
grocery_df['stock']

<h2>Constructing Dataframes objects</h2>

In [None]:
# We can create from a single series.

pd.DataFrame(price_series, columns=['price'])

In [None]:
# We can create fron a list of dicts
pd.DataFrame([price_grocery, category_grocery, stock_grocery])

In [None]:
# From a two-dimensional NumPy array

pd.DataFrame(np.random.random((3, 2)), columns=['Home', 'Away'], index=['Dev1', 'Dev2', 'Dev3'])

In [None]:
# From a Numpy Structured array (remember custom dtype)

pd.DataFrame(np.ones(3, dtype=[('height','f8'), ('count', 'int8')]))

<h3>Finally, you can create an index explicity</h3>


In [None]:
my_index = pd.Index([2,5,7,9, 11])

# You cannot change them..immutability
try:
    my_index[2] = 123
except TypeError:
    print("Yup...cant do it")
    
# They have same properties as numpy array
my_index.ndim, my_index.shape, my_index.dtype, my_index.size
