# 基本思路：
    1、获取stockbasic和stockearningreport的全量数据
    2、stockbasic根据所在市场进行过滤，只要主板和中小板的
    3、stockearingreport进行筛选，条件为：
        3.1 基于2中的主板和中小板数据
        3.2 只要最近3年的数据
        3.3 仅要年报（1231）这一天
    4、完成后，stockearningreport根据code进行聚类，并获取net_proft_yoy_raise的最小，均值和最大值
    5、根据group后的net_proft_yoy_raise最小值进行筛选，暂定条件为大于30%

In [1]:
from sqlalchemy.orm import Session
from dataUpdate import engine
from dataUpdate.model.basicinfo import StockEarningReport,StockBasic
import pandas as pd

In [2]:
sess = Session(engine)

In [3]:
# 1、获取所有报表是数据
sql = sess.query(StockEarningReport)
res_df = pd.read_sql(sql.statement,sql.session.bind)
res_df.head(3)

Unnamed: 0,id,code,earning_per_share,revenue,revenue_yoy_rise,revenue_sos_rise,net_profit,net_profit_yoy_rise,net_profit_sos_rise,return_on_equity,operating_cash_flow_per_share,gross_profit_ratio,year,date
0,1,617,-0.04,304662500.0,10.749529,-47.6992,-10572685.25,-134.422353,-231.3498,-1.27,-0.1,3.111316,2010,331
1,2,757,-0.08,131920400.0,101.189055,16.3653,-25948235.48,16.468424,-146.0735,0.0,-0.0423,12.714494,2010,331
2,3,301041,,70304120.0,,,4112270.94,,,,-0.169109,15.906955,2010,331


In [14]:
# 2、获取主板+中小板数据
sql2 = sess.query(StockBasic.symbol,StockBasic.name,StockBasic.circulating_market_cap,StockBasic.sw_l1).filter(StockBasic.market.in_(['主板','中小板']))
res_df2 = pd.read_sql(sql2.statement,sql2.session.bind)
res_df2.head(3)

Unnamed: 0,symbol,name,circulating_market_cap,sw_l1
0,600000,浦发银行,3137.74,银行I
1,600004,白云机场,253.492,交通运输I
2,600006,东风汽车,128.4,汽车I


In [5]:
# 3、仅获取主板和中小板的，3年年份的年报数据
annul_report = res_df[(res_df['year'].isin([2018,2019,2020])) & (res_df['date']=='1231') & (res_df['code'].isin(res_df2['symbol']))]
annul_report.shape

(9327, 14)

In [6]:
annul_report[annul_report['code']=='601919']

Unnamed: 0,id,code,earning_per_share,revenue,revenue_yoy_rise,revenue_sos_rise,net_profit,net_profit_yoy_rise,net_profit_sos_rise,return_on_equity,operating_cash_flow_per_share,gross_profit_ratio,year,date
192670,192671,601919,0.12,120829500000.0,33.566485,4.4383,1230026000.0,-53.792034,-55.2878,5.52,0.795865,8.462042,2018,1231
225298,225299,601919,0.56,151056700000.0,25.016363,-0.1942,6764105000.0,449.92,428.7379,21.57,1.72946,10.742339,2019,1231
258685,258686,601919,0.81,171258800000.0,13.373888,22.6643,9927098000.0,46.76,122.8038,25.05,3.673106,14.208792,2020,1231


In [7]:
# 4、根据code进行groupby，并计算年净利润增长率的最小，均值和最大值
annul_report_groupby = annul_report.groupby('code').agg({'net_profit_yoy_rise':['min','mean','max']})

In [17]:
# 5、strategy1: 最低净利润增长也要大于30
res1 = annul_report_groupby[annul_report_groupby['net_profit_yoy_rise']['min']>20]

In [18]:
res1.shape

(197, 3)

In [19]:
res1.head()

Unnamed: 0_level_0,net_profit_yoy_rise,net_profit_yoy_rise,net_profit_yoy_rise
Unnamed: 0_level_1,min,mean,max
code,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
9,40.86,73.7,119.75
20,25.09,109.651962,238.155887
37,21.061024,69.170341,157.12
49,25.12,30.677186,33.551559
90,20.22,36.100641,58.17


In [20]:
res2 = pd.merge(left=res1,right=res_df2,left_on='code',right_on='symbol',how='inner')



In [21]:
res2.sort_values(by='circulating_market_cap',inplace=True,ascending=False)
res2.head()

Unnamed: 0,"(net_profit_yoy_rise, min)","(net_profit_yoy_rise, mean)","(net_profit_yoy_rise, max)",symbol,name,circulating_market_cap,sw_l1
144,22.285194,34.811731,49.58,601888,中国国旅,7572.87,休闲服务I
8,29.38,32.939113,36.267338,568,泸州老窖,4597.78,食品饮料I
111,36.25,103.936743,192.33023,600031,三一重工,4066.41,机械设备I
132,28.63,46.34195,56.39,600809,山西汾酒,3840.38,食品饮料I
67,53.28,62.486099,73.13,2475,立讯精密,3701.54,电子I


In [22]:
res2['sw_l1'].value_counts()

机械设备I    25
化工I      24
医药生物I    18
电子I      16
公用事业I    13
计算机I     12
食品饮料I    11
电气设备I    10
房地产I      9
轻工制造I     9
建筑装饰I     6
建筑材料I     6
有色金属I     5
传媒I       5
交通运输I     4
家用电器I     4
采掘I       3
汽车I       3
国防军工I     2
非银金融I     2
综合I       2
纺织服装I     2
商业贸易I     2
通信I       2
钢铁I       1
休闲服务I     1
Name: sw_l1, dtype: int64

In [24]:
res2[res2.sw_l1=='医药生物I']

Unnamed: 0,"(net_profit_yoy_rise, min)","(net_profit_yoy_rise, mean)","(net_profit_yoy_rise, max)",symbol,name,circulating_market_cap,sw_l1
119,20.25,27.831366,41.624098,600436,片仔癀,2376.83,医药生物I
14,52.050341,66.683447,76.36,661,长春高新,1784.05,医药生物I
179,23.774388,123.818129,275.24,603882,金域医学,764.358,医药生物I
94,25.493984,28.394661,30.37,2821,凯莱英,733.805,医药生物I
185,30.58,34.898652,41.29,603939,益丰药房,545.971,医药生物I
151,41.718019,58.342673,68.67,603127,昭衍新药,366.103,医药生物I
171,33.25,36.953678,42.5,603707,健友股份,317.765,医药生物I
17,44.418654,47.112885,49.34,739,普洛药业,278.338,医药生物I
125,23.06,29.145638,36.326914,600529,山东药玻,245.662,医药生物I
97,25.32,27.039769,30.12,2901,大博医疗,188.69,医药生物I
