In [5]:
import pandas as pd
taxi_owners = pd.read_pickle('taxi_owners.p')
taxi_veh= pd.read_pickle('taxi_vehicles.p')


Merge taxi_owners with taxi_veh on the column vid, and save the result to taxi_own_veh.


In [10]:

# Merge the taxi_owners and taxi_veh tables
taxi_own_veh = taxi_owners.merge(taxi_veh, on="vid")

# Print the column names of the taxi_own_veh
print(taxi_own_veh.columns)

Index(['rid', 'vid', 'owner_x', 'address', 'zip', 'make', 'model', 'year',
       'fuel_type', 'owner_y'],
      dtype='object')


Set the left and right table suffixes for overlapping columns of the merge to _own and _veh, respectively.


In [11]:
# Merge the taxi_owners and taxi_veh tables setting a suffix
taxi_own_veh = taxi_owners.merge(taxi_veh, on='vid', suffixes=('_own', '_veh'))

# Print the column names of taxi_own_veh
print(taxi_own_veh.columns)

Index(['rid', 'vid', 'owner_own', 'address', 'zip', 'make', 'model', 'year',
       'fuel_type', 'owner_veh'],
      dtype='object')


Select the fuel_type column from taxi_own_veh and print the value_counts() to find the most popular fuel_types used.


In [12]:
# Merge the taxi_owners and taxi_veh tables setting a suffix
taxi_own_veh = taxi_owners.merge(taxi_veh, on='vid', suffixes=('_own','_veh'))

# Print the value_counts to find the most popular fuel_type
print(taxi_own_veh['fuel_type'].value_counts())

fuel_type
HYBRID                    2792
GASOLINE                   611
FLEX FUEL                   89
COMPRESSED NATURAL GAS      27
Name: count, dtype: int64


In [13]:
wards = pd.read_pickle('ward.p')
census= pd.read_pickle('census.p')

Merge wards and census on the ward column and save the result to wards_census.

In [15]:

print(wards.columns)
print(census.columns)

# Merge the wards and census tables on the ward column
wards_census = wards.merge(census, on='ward')

# Print the shape of wards_census
print('wards_census table shape:', wards_census.shape)

Index(['ward', 'alderman', 'address', 'zip'], dtype='object')
Index(['ward', 'pop_2000', 'pop_2010', 'change', 'address', 'zip'], dtype='object')
wards_census table shape: (50, 9)


Merge the wards_altered and census tables on the ward column, and notice the difference in returned rows.


In [18]:
wards_altered=  pd.read_pickle('ward.p')

# Print the first few rows of the wards_altered table to view the change 
print(wards_altered[['ward']].head())

# Merge the wards_altered and census tables on the ward column
wards_altered_census = wards_altered.merge(census, on="ward")

print(wards_altered_census.columns)

# Print the shape of wards_altered_census
print('wards_altered_census table shape:', wards_altered_census.shape)

  ward
0    1
1    2
2    3
3    4
4    5
Index(['ward', 'alderman', 'address_x', 'zip_x', 'pop_2000', 'pop_2010',
       'change', 'address_y', 'zip_y'],
      dtype='object')
wards_altered_census table shape: (50, 9)


Merge the wards and census_altered tables on the ward column, and notice the difference in returned rows.


In [20]:
census_altered = pd.read_pickle('census.p')

# Print the first few rows of the census_altered table to view the change 
print(census_altered[['ward']].head())

# Merge the wards and census_altered tables on the ward column
wards_census_altered = wards.merge(census_altered, on='ward')

print(wards_census_altered.columns)

# Print the shape of wards_census_altered
print('wards_census_altered table shape:', wards_census_altered.shape)

  ward
0    1
1    2
2    3
3    4
4    5
Index(['ward', 'alderman', 'address_x', 'zip_x', 'pop_2000', 'pop_2010',
       'change', 'address_y', 'zip_y'],
      dtype='object')
wards_census_altered table shape: (50, 9)


## Understanding Relationships Between Tables

In relational databases, understanding the nature of relationships between tables is crucial. Here are the different types of relationships with the e-commerce example:

1. **One-to-One Relationship**:
    - Each record in the first table corresponds to exactly one record in the second table
    - Example: The customer table and cust_tax_info table have a one-to-one relationship
    - Each customer has exactly one tax ID, and each tax ID belongs to exactly one customer

2. **One-to-Many Relationship**:
    - Each record in the first table corresponds to multiple records in the second table
    - Example: The customer table and orders table have a one-to-many relationship
    - A single customer can place multiple orders, but each order belongs to only one customer

3. **Many-to-Many Relationship**:
    - Records in the first table can relate to multiple records in the second table, and vice versa
    - Example: The orders table and products table have a many-to-many relationship
    - One order can contain multiple products, and one product can appear in multiple orders
    - This relationship typically requires a junction/bridge table to implement

4. **One-to-One or Zero Relationship**:
    - Like one-to-one, but allows for records that don't have a match
    - Example: Some customers might not have tax information yet

Understanding these relationships helps in designing efficient database schemas and performing appropriate joins when querying data.

###  one to one
The relationship between products and inventory .
The relationship between customer and cust_tax_info.
### one to many
The relationship between the products and orders.
The relationship between the customers and orders.

In [22]:
licenses = pd.read_pickle('licenses.p')
biz_owners = pd.read_pickle('business_owners.p')

In [35]:
# Merge the licenses and biz_owners table on account
licenses_owners = licenses.merge(biz_owners, on='account')

print(licenses_owners.columns)

# Group the results by title then count the number of accounts
counted_df = licenses_owners.groupby('title').agg({'account':'count'})

print(counted_df.head())

# Sort the counted_df in descending order
sorted_df = counted_df.sort_values(by='account', ascending=False)

# Use .head() method to print the first few rows of sorted_df
print(sorted_df.head())

Index(['account', 'ward', 'aid', 'business', 'address', 'zip', 'first_name',
       'last_name', 'title'],
      dtype='object')
                    account
title                      
ASST. SECRETARY         111
BENEFICIARY               4
CEO                     110
DIRECTOR                146
EXECUTIVE DIRECTOR       10
                 account
title                   
PRESIDENT           6259
SECRETARY           5205
SOLE PROPRIETOR     1658
OTHER               1200
VICE PRESIDENT       970
