In [1]:
import pandas as pd
from datetime import datetime

In [46]:
hist_econ_file = "econ_percentages.csv"
cur_econ_file = "current_econ_data.csv"
states_file = "states.csv"

## Economic Data
* Current and historical data shows indicators as a percentage change over the 12-month moving average
* Current conditions are:
* Inflation: 0.0047 in January 2018. -0.12766% change over 12-month moving average
* Unemployment: 4.1% - a -0.1132% change over the 12-month moving average
* TBill Rate: 1.89% - a 1.277% change over the 12-month moving average
* National E-Retail Sales: 67,076,000,000 - a 0.661778% change over the 12 month moving average

In [70]:
curr_df = pd.read_csv(cur_econ_file).rename(columns=lambda x: x.strip())
curr_df

Unnamed: 0.1,Unnamed: 0,inflation,unemployment,tbill,natsales
0,2017-01,0.0053,0.048,0.0089,40364.0
1,2017-02,0.0053,0.047,0.0083,37891.0
2,2017-03,0.0053,0.045,0.0092,44061.0
3,2017-04,0.0052,0.044,0.0102,41275.0
4,2017-05,0.0052,0.043,0.0108,44373.0
5,2017-06,0.0051,0.043,0.0116,43294.0
6,2017-07,0.0051,0.043,0.0124,42584.0
7,2017-08,0.0051,0.044,0.0122,44733.0
8,2017-09,0.0051,0.041,0.0124,42021.0
9,2017-10,0.005,0.041,0.013,43925.0


In [41]:
hist_df = pd.read_csv(hist_econ_file).rename(columns=lambda x: x.strip())
print(hist_df.dtypes)

date               object
dc_sales            int64
dc_profits          int64
inflation         float64
unemployment      float64
tbill             float64
natsales            int64
dc_sales_chg      float64
dc_profits_chg    float64
natsales_chg      float64
dtype: object


## Comparing sales at time periods with similar economic conditions
### Which months had similar inflation?
* Look for + or - 5%

In [103]:
months = hist_df.query('inflation < (0.00407 * 1.05) & inflation > (0.00407 * 0.95)')
months

Unnamed: 0,date,dc_sales,dc_profits,inflation,unemployment,tbill,natsales,dc_sales_chg,dc_profits_chg,natsales_chg
21,2010-03-01,238070141,28654790,0.0041,0.099,0.0373,21630,0.116046,0.095777,0.174267


### Results
Looks like only one month was within 5% of the current inflation rate of change. In the past this month had good sales and profit growth company wide.
How did different states perform during this month?

In [63]:
states_df = pd.read_csv(states_file, sep=',', names = ["date", "state", "dc_sales", "dc_profits", "date2", "inflation", "unemployment", "tbill", "natsales"])
sdf = states_df.drop(['date2'], axis=1)

In [67]:
sdf.query('date == "2010-03-01"').sort_values(['dc_sales'], ascending=[True])

Unnamed: 0,date,state,dc_sales,dc_profits,inflation,unemployment,tbill,natsales
2508,2010-03-01,DE,42636,3489,0.0041,0.099,0.0373,21630
2538,2010-03-01,RI,51677,7407,0.0041,0.099,0.0373,21630
2529,2010-03-01,NH,55338,7291,0.0041,0.099,0.0373,21630
2507,2010-03-01,DC,78344,1312,0.0041,0.099,0.0373,21630
2531,2010-03-01,NM,124727,12410,0.0041,0.099,0.0373,21630
2521,2010-03-01,ME,164493,18527,0.0041,0.099,0.0373,21630
2549,2010-03-01,WY,186522,29508,0.0041,0.099,0.0373,21630
2530,2010-03-01,NJ,202001,28001,0.0041,0.099,0.0373,21630
2545,2010-03-01,VT,218305,31497,0.0041,0.099,0.0373,21630
2520,2010-03-01,MD,233884,28992,0.0041,0.099,0.0373,21630


## Analysis
* High-performers: CA, IA, IL, MN, KS
* We expect CA, but Iowa, IL, MN, KS? 

- Shipping might be lower to Iowa, IL, and MN since they're all relatively close. There may be a distribution center nearby.
  

In [104]:
sdf.query('date == "2010-03-01"').sort_values(['dc_profits'], ascending=[True])

Unnamed: 0,date,state,dc_sales,dc_profits,inflation,unemployment,tbill,natsales
2507,2010-03-01,DC,78344,1312,0.0041,0.099,0.0373,21630
2508,2010-03-01,DE,42636,3489,0.0041,0.099,0.0373,21630
2529,2010-03-01,NH,55338,7291,0.0041,0.099,0.0373,21630
2538,2010-03-01,RI,51677,7407,0.0041,0.099,0.0373,21630
2531,2010-03-01,NM,124727,12410,0.0041,0.099,0.0373,21630
2521,2010-03-01,ME,164493,18527,0.0041,0.099,0.0373,21630
2530,2010-03-01,NJ,202001,28001,0.0041,0.099,0.0373,21630
2520,2010-03-01,MD,233884,28992,0.0041,0.099,0.0373,21630
2549,2010-03-01,WY,186522,29508,0.0041,0.099,0.0373,21630
2545,2010-03-01,VT,218305,31497,0.0041,0.099,0.0373,21630


### Months with a similar change in national e-retail
* Holiday sales cause large percentage changes, so get the annual average % change.

In [92]:
current_natsales = pd.read_csv('current_natsales.csv')
#get the moving average %change
#current_natsales['natsales_pct_chg']
a = current_natsales['Natsales'].pct_change(periods=11)
a.mean()

0.16505866291185725

* There were no months within 5%, but there are some within 10%

In [105]:
months_nat_sales = hist_df.query('natsales_chg < (0.16505866291185725 * 1.1) & natsales_chg > (0.16505866291185725 * 0.9)')
months_nat_sales

Unnamed: 0,date,dc_sales,dc_profits,inflation,unemployment,tbill,natsales,dc_sales_chg,dc_profits_chg,natsales_chg
21,2010-03-01,238070141,28654790,0.0041,0.099,0.0373,21630,0.116046,0.095777,0.174267
33,2011-03-01,495223720,61241948,0.0098,0.09,0.0341,23814,0.12409,0.149123,0.152327


## Results
2010-03-01 matches our current economic conditions in two criteria. It differs greatly in terms of unemployment and the T-Bill rate. Let's look for any months matching those criteria.

Unemployment hasn't been this low in a long time, so there are no months with a similar rate. We can, however, look for months with a similar change over the annual moving average, -0.1132

In [108]:
unempl_chg = hist_df['unemployment'].pct_change(periods=11)
hist_df['unempl_chg'] = unempl_chg

In [112]:
months_unempl = hist_df.query('unempl_chg > (-0.1132 * 1.1) & unempl_chg < (-0.1132 * 0.9)')
months_unempl

Unnamed: 0,date,dc_sales,dc_profits,inflation,unemployment,tbill,natsales,dc_sales_chg,dc_profits_chg,natsales_chg,unempl_chg
40,2011-10-01,588383976,70765687,-0.0021,0.088,0.0215,24091,0.017367,0.012727,0.027466,-0.102041
51,2012-09-01,433581516,53122938,0.0045,0.078,0.0172,25451,0.002651,0.005194,-0.070419,-0.113636


In [116]:
months_tbill = hist_df.query('tbill < (0.0189 * 1.05) & tbill > (0.0189 * 0.95)')
months_tbill

Unnamed: 0,date,dc_sales,dc_profits,inflation,unemployment,tbill,natsales,dc_sales_chg,dc_profits_chg,natsales_chg,unempl_chg
39,2011-09-01,578340059,69876357,0.0015,0.09,0.0197,23447,-0.028624,-0.040238,-0.039962,-0.042553
42,2011-12-01,586763385,71926856,-0.0025,0.085,0.0197,36937,0.012919,0.014795,0.263322,-0.065934
43,2012-01-01,836933916,101907073,0.0044,0.083,0.0197,24739,0.426357,0.416815,-0.330238,-0.077778
44,2012-02-01,822664380,99284441,0.0044,0.083,0.0197,24202,-0.01705,-0.025736,-0.021707,-0.077778
47,2012-05-01,941270911,113246829,-0.0012,0.082,0.018,26384,0.036425,0.016973,0.079895,-0.098901
55,2013-01-01,1302797200,162573914,0.003,0.08,0.0191,28098,1.627357,1.688071,-0.294747,-0.036145
56,2013-02-01,1352358644,169192803,0.0082,0.077,0.0198,25253,0.038042,0.040713,-0.101253,-0.060976
57,2013-03-01,1474620822,184265051,0.0026,0.075,0.0196,27283,0.090407,0.089083,0.080386,-0.085366
59,2013-05-01,3206820724,193215556,0.0018,0.075,0.0194,28055,1.074615,0.005998,0.023868,-0.085366


In [121]:
tbill_chg = hist_df['tbill'].pct_change(periods=11)
hist_df['tbill_chg'] = unempl_chg

## Results
There were no monhts with a similar % change for T-Bill rate, but there were 9 with a similar rate. 2011-09 has a similar TBill rate and 2011-10 has a similar change in unemployment rate, so it might be worth looking at this 2-month span. 

Overall, we have identified 2010-03 as one month of interest and 2011-09 - 2011-10 as also of possible interest. How did we perform during these months?

In [133]:
sdf.query('date == "2011-09-01"').sort_values(['dc_profits'], ascending=[True])

Unnamed: 0,date,state,dc_sales,dc_profits,inflation,unemployment,tbill,natsales
3438,2011-09-01,RI,20572,4845,0.0015,0.09,0.0197,23447
3408,2011-09-01,DE,116250,17617,0.0015,0.09,0.0197,23447
3429,2011-09-01,NH,235812,20307,0.0015,0.09,0.0197,23447
3407,2011-09-01,DC,183534,22173,0.0015,0.09,0.0197,23447
3406,2011-09-01,CT,373562,38773,0.0015,0.09,0.0197,23447
3445,2011-09-01,VT,417084,41771,0.0015,0.09,0.0197,23447
3419,2011-09-01,MA,451835,59511,0.0015,0.09,0.0197,23447
3449,2011-09-01,WY,442014,62378,0.0015,0.09,0.0197,23447
3439,2011-09-01,SC,594498,65174,0.0015,0.09,0.0197,23447
3430,2011-09-01,NJ,771018,88155,0.0015,0.09,0.0197,23447


In [134]:
sdf.query('date == "2011-10-01"').sort_values(['dc_profits'], ascending=[True])

Unnamed: 0,date,state,dc_sales,dc_profits,inflation,unemployment,tbill,natsales
3458,2011-10-01,DE,108235,8654,-0.0021,0.088,0.0215,24091
3488,2011-10-01,RI,66819,10931,-0.0021,0.088,0.0215,24091
3457,2011-10-01,DC,192788,26301,-0.0021,0.088,0.0215,24091
3479,2011-10-01,NH,250754,30049,-0.0021,0.088,0.0215,24091
3499,2011-10-01,WY,357796,36886,-0.0021,0.088,0.0215,24091
3495,2011-10-01,VT,480650,54023,-0.0021,0.088,0.0215,24091
3481,2011-10-01,NM,534772,65237,-0.0021,0.088,0.0215,24091
3469,2011-10-01,MA,648725,78694,-0.0021,0.088,0.0215,24091
3471,2011-10-01,ME,698203,92006,-0.0021,0.088,0.0215,24091
3489,2011-10-01,SC,639907,96979,-0.0021,0.088,0.0215,24091


## Observations
* Indiana and Kentucky are regularly more profitable than Ohio and Pennsylvania
* Some of the more populous states like NY, VA, NC, and Florida are consistent under-performers

## Questions
* What is selling in CA, MN, IA, IL, KS? What is most profitable there?
* How are those products selling in our most populous and tech-heavy underperformers, MA, NY, VA, NC, OH, PA, FL?

## Findings

Which products have sold the most?
```
SELECT p.name, SUM(p.price) AS sales, SUM(p.cost) AS cost, SUM(p.price-p.cost) AS profits FROM fact f LEFT JOIN product p ON f.prod_id = p.prod_id
GROUP BY p.name ORDER BY sales DESC;
Tablet PC (10 in. display, 64 GB)                                  | 4902069145 | 4851254985 |  50814160 |
| 173 GB SAS Disk                                                    |  818620025 |  712633114 | 105986911 |
| VPN Appliance (250 Clienti license)                                |  806135927 |  656986577 | 149149350 |
| Server (1U rackmount, hex-core, 16GB, 8TB)                         |  768481065 |  619406670 | 149074395 |
| Basic Desktop                                                      |  700655080 |  643103842 |  57551238

Which products have been the most profitable?
SELECT p.name, SUM(p.price) AS sales, SUM(p.cost) AS cost, SUM(p.price-p.cost) AS profits FROM fact f LEFT JOIN product p ON f.prod_id = p.prod_id
GROUP BY p.name ORDER BY profits DESC;

VPN Appliance (250 Clienti license)                                |  806135927 |  656986577 | 149149350 |
| Server (1U rackmount, hex-core, 16GB, 8TB)                         |  768481065 |  619406670 | 149074395 |
| 173 GB SAS Disk                                                    |  818620025 |  712633114 | 105986911 |
| Scanner                                                            |  459446737 |  370731095 |  88715642 |
| VPN Appliance (50 Clienti license)                                 |  689302223 |  628122638 |  61179585 |

Our consumer-grade tech (like tablets) seems to be far less profitable than professional grade equipment (VPN Appliances).

What about during our given economic conditions?
SELECT p.name, SUM(p.price) AS sales, SUM(p.cost) AS cost, SUM(p.price-p.cost) AS profits FROM fact f LEFT JOIN product p ON f.prod_id = p.prod_id
WHERE f.Date = '2010-03-01'
GROUP BY p.name ORDER BY profits DESC
LIMIT 5;

VPN Appliance (250 Clienti license)                                | 685927 | 557001 |  128926 |
| Tablet PC (10 in. display, 64 GB)                                  | 543566 | 535082 |    8484 |
| Server (1U rackmount, hex-core, 16GB, 8TB)                         | 470019 | 378842 |   91177 |
| 173 GB SAS Disk                                                    | 292624 | 249786 |   42838 |
| Scanner                                                            | 279870 | 223959 |   55911 |


Most profitable products during times with similar change in inflation:
+--------------------------------------------+--------+--------+---------+
| name                                       | sales  | cost   | profits |
+--------------------------------------------+--------+--------+---------+
| VPN Appliance (250 Clienti license)        | 685927 | 557001 |  128926 |
| Server (1U rackmount, hex-core, 16GB, 8TB) | 470019 | 378842 |   91177 |
| Scanner                                    | 279870 | 223959 |   55911 |
| 173 GB SAS Disk                            | 292624 | 249786 |   42838 |
| VPN Appliance (50 Clienti license)         | 196416 | 172258 |   24158 |
+--------------------------------------------+--------+--------+---------+


What about for tbill rates?
SELECT p.name, SUM(p.price) AS sales, SUM(p.cost) AS cost, SUM(p.price-p.cost) AS profits FROM fact f LEFT JOIN product p ON f.prod_id = p.prod_id
WHERE (f.Date = '2011-09-01' OR f.Date = '2011-12-01' OR f.Date = '2012-01-01' OR f.Date = '2012-02-01' OR f.Date = '2012-05-01' OR f.Date = '2013-01-01' OR f.Date = '2013-02-01' OR f.Date = '2013-03-01' OR f.Date = '2013-05-01')
GROUP BY p.name ORDER BY sales DESC;


Tablet PC (10 in. display, 64 GB)                                  | 47361352 | 46937004 |  424348 |
| Server (1U rackmount, hex-core, 16GB, 8TB)                         |  7520304 |  6061472 | 1458832 |
| 173 GB SAS Disk                                                    |  5893579 |  5102588 |  790991 |
| Premium Gamer Desktop                                              |  5685636 |  5631736 |   53900 |
| Basic Desktop                                                      |  5673546 |  5273212 |  400334 |


Most profitable products during times with similar tbill rates
SELECT p.name, SUM(p.price) AS sales, SUM(p.cost) AS cost, SUM(p.price-p.cost) AS profits FROM fact f LEFT JOIN product p ON f.prod_id = p.prod_id
WHERE (f.Date = '2011-09-01' OR f.Date = '2011-12-01' OR f.Date = '2012-01-01' OR f.Date = '2012-02-01' OR f.Date = '2012-05-01' OR f.Date = '2013-01-01' OR f.Date = '2013-02-01' OR f.Date = '2013-03-01' OR f.Date = '2013-05-01')
GROUP BY p.name ORDER BY profits DESC
LIMIT 5;


| name                                       | sales   | cost    | profits |
+--------------------------------------------+---------+---------+---------+
| Server (1U rackmount, hex-core, 16GB, 8TB) | 7520304 | 6061472 | 1458832 |
| VPN Appliance (250 Clienti license)        | 4578340 | 3738508 |  839832 |
| 173 GB SAS Disk                            | 5893579 | 5102588 |  790991 |
| Scanner                                    | 3614481 | 2911039 |  703442 |
| VPN Appliance (50 Clienti license)         | 5530738 | 5035792 |  494946 |
+--------------------------------------------+---------+---------+---------+
```

## Recommendations
* Focus on marketing to our underperformers along the East Coast: FL, NC, VA, PA, NY, MA. 
* Focus on reaching computing professionals, not the consumer market.
* Sponsor tech conferences, especially those favored by sysadmins.
* Advertise on StackOverflow's sections that are favored by sysadmins.
* Get involved in professional organizations for CIOs and CTOs along the East Coast.