In [275]:
import pandas as pd
from math import ceil

<h2>Banking Analysis</h2>

In [276]:
# importing banking data
filepath1 = 'Resources/budget_data.csv'

pybank = pd.read_csv(filepath1)

pybank.head()

Unnamed: 0,Date,Profit/Losses
0,Jan-2010,867884
1,Feb-2010,984655
2,Mar-2010,322013
3,Apr-2010,-69417
4,May-2010,310503


In [277]:
# total number of months included in the dataset
month_count = pybank['Date'].nunique()

# net total amount of Profit/Losses
net_amount = pybank['Profit/Losses'].sum()

# average of the changes in Profit/Losses
differences = pybank['Profit/Losses'].diff()
avg_changes = round(differences[1:].mean(), 2)

# greatest increase in profits (date and amount) over the entire period
max_profit = pybank.loc[pybank['Profit/Losses'] == pybank['Profit/Losses'].max(), :]
max_profit.set_index('Date', inplace=True)

# greatest decrease in profits (date and amount) over the entire period
max_loss = pybank.loc[pybank['Profit/Losses'] == pybank['Profit/Losses'].min(), :]
max_loss.set_index('Date', inplace=True)

In [279]:
print(f'''
  Financial Analysis
  ----------------------------
  Total Months: {month_count}
  Total: ${net_amount}
  Average  Change: ${avg_changes}
  Greatest Increase in Profits: {max_profit.index[0]} (${max_profit['Profit/Losses'][0]})
  Greatest Decrease in Profits: {max_loss.index[0]} (${max_loss['Profit/Losses'][0]})
  ''')


  Financial Analysis
  ----------------------------
  Total Months: 86
  Total: $38382578
  Average  Change: $-2315.12
  Greatest Increase in Profits: Feb-2012 ($1170593)
  Greatest Decrease in Profits: Sep-2013 ($-1196225)
  


<h2>Election Analysis</h2>

In [280]:
# importing election data
filepath2 = 'Resources/election_data.csv'

pypoll = pd.read_csv(filepath2)
pypoll.head()

Unnamed: 0,Voter ID,County,Candidate
0,12864552,Marsh,Khan
1,17444633,Marsh,Correy
2,19330107,Marsh,Khan
3,19865775,Queen,Khan
4,11927875,Marsh,Khan


In [281]:
# total number of votes
# pd.dataframe.duplicated() returns only one value, FALSE, indicating no duplicate votes

total_votes = pypoll_df['Voter ID'].count()

pypoll_df.duplicated(subset=['Voter ID']).nunique()

1

In [282]:
# percentage of votes won by each candidate
percent_won = pypoll['Candidate'].value_counts(normalize=True)


# number of votes won by each candidate
votes_won = pypoll['Candidate'].value_counts()


# winner of election based on popular vote
votes_won.sort_values(ascending=False)
winner = votes_won.index[0]

In [283]:
print(f'''
  Election Results
  -------------------------
  Total Votes: {total_votes}
  -------------------------
  {votes_won.index[0]}: {percent_won[0].round(2) * 100}% ({votes_won[0]})
  {votes_won.index[1]}: {percent_won[1].round(2) * 100}% ({votes_won[1]})
  {votes_won.index[2]}: {percent_won[2].round(2) * 100}% ({votes_won[2]})
  {votes_won.index[3]}: {percent_won[3].round(2) * 100}% ({votes_won[3]})
  -------------------------
  Winner: {winner}
  -------------------------
  ''')


  Election Results
  -------------------------
  Total Votes: 3521001
  -------------------------
  Khan: 63.0% (2218231)
  Correy: 20.0% (704200)
  Li: 14.000000000000002% (492940)
  O'Tooley: 3.0% (105630)
  -------------------------
  Winner: Khan
  -------------------------
  
